def test_dump():
    Xs, y = load_svmlight_file(datafile)
    Xd = Xs.toarray()

    for X in (Xs, Xd):
        for zero_based in (True, False):
            for dtype in [np.float32, np.float64]:
                f = BytesIO()
                # we need to pass a comment to get the version info in;
                # LibSVM doesn't grok comments so they're not put in by
                # default anymore.
                dump_svmlight_file(X.astype(dtype), y, f, comment="test",
                                   zero_based=zero_based)
                f.seek(0)

                comment = f.readline()
                assert_in("scikit-learn %s" % sklearn.__version__, comment)
                comment = f.readline()
                assert_in(["one", "zero"][zero_based] + "-based", comment)

                X2, y2 = load_svmlight_file(f, dtype=dtype,
                                            zero_based=zero_based)
                assert_equal(X2.dtype, dtype)
                if dtype == np.float32:
                    assert_array_almost_equal(
                        # allow a rounding error at the last decimal place
                        Xd.astype(dtype), X2.toarray(), 4)
                else:
                    assert_array_almost_equal(
                        # allow a rounding error at the last decimal place
                        Xd.astype(dtype), X2.toarray(), 15)
                assert_array_equal(y, y2)
def test_dump():
    Xs, y = load_svmlight_file(datafile)
    Xd = Xs.toarray()

    for X in (Xs, Xd):
        for zero_based in (True, False):
            for dtype in [np.float32, np.float64]:
                f = BytesIO()
                dump_svmlight_file(X.astype(dtype), y, f, zero_based=zero_based)
                f.seek(0)

                comment = f.readline()
                assert_in("scikit-learn %s" % sklearn.__version__, comment)
                comment = f.readline()
                assert_in(["one", "zero"][zero_based] + "-based", comment)

                X2, y2 = load_svmlight_file(f, dtype=dtype, zero_based=zero_based)
                assert_equal(X2.dtype, dtype)
                if dtype == np.float32:
                    assert_array_almost_equal(
                        # allow a rounding error at the last decimal place
                        Xd.astype(dtype),
                        X2.toarray(),
                        4,
                    )
                else:
                    assert_array_almost_equal(
                        # allow a rounding error at the last decimal place
                        Xd.astype(dtype),
                        X2.toarray(),
                        15,
                    )
                assert_array_equal(y, y2)
def test_dump_concise():
    one = 1
    two = 2.1
    three = 3.01
    exact = 1.000000000000001
    # loses the last decimal place
    almost = 1.0000000000000001
    X = [[one, two, three, exact, almost],
         [1e9, 2e18, 3e27, 0, 0],
         [0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0]]
    y = [one, two, three, exact, almost]
    f = BytesIO()
    dump_svmlight_file(X, y, f)
    f.seek(0)
    # make sure it's using the most concise format possible
    assert_equal(f.readline(),
                 b("1 0:1 1:2.1 2:3.01 3:1.000000000000001 4:1\n"))
    assert_equal(f.readline(), b("2.1 0:1000000000 1:2e+18 2:3e+27\n"))
    assert_equal(f.readline(), b("3.01 \n"))
    assert_equal(f.readline(), b("1.000000000000001 \n"))
    assert_equal(f.readline(), b("1 \n"))
    f.seek(0)
    # make sure it's correct too :)
    X2, y2 = load_svmlight_file(f)
    assert_array_almost_equal(X, X2.toarray())
    assert_array_equal(y, y2)
Exemple #4
0
def test_read_inventory_v1():
    f = BytesIO(inventory_v1)
    f.readline()
    invdata = read_inventory_v1(f, '/util', posixpath.join)
    assert invdata['py:module']['module'] == \
           ('foo', '1.0', '/util/foo.html#module-module', '-')
    assert invdata['py:class']['module.cls'] == \
           ('foo', '1.0', '/util/foo.html#module.cls', '-')
Exemple #5
0
 def test_first_byte_timestamp_updated_on_readline(self):
     s = BytesIO(b"foobar\nfoobar\nfoobar")
     s = tcp.Reader(s)
     s.readline()
     assert s.first_byte_timestamp
     expected = s.first_byte_timestamp
     s.readline()
     assert s.first_byte_timestamp == expected
def test_dump():
    X_sparse, y_dense = load_svmlight_file(datafile)
    X_dense = X_sparse.toarray()
    y_sparse = sp.csr_matrix(y_dense)

    # slicing a csr_matrix can unsort its .indices, so test that we sort
    # those correctly
    X_sliced = X_sparse[np.arange(X_sparse.shape[0])]
    y_sliced = y_sparse[np.arange(y_sparse.shape[0])]

    for X in (X_sparse, X_dense, X_sliced):
        for y in (y_sparse, y_dense, y_sliced):
            for zero_based in (True, False):
                for dtype in [np.float32, np.float64, np.int32]:
                    f = BytesIO()
                    # we need to pass a comment to get the version info in;
                    # LibSVM doesn't grok comments so they're not put in by
                    # default anymore.

                    if (sp.issparse(y) and y.shape[0] == 1):
                        # make sure y's shape is: (n_samples, n_labels)
                        # when it is sparse
                        y = y.T

                    dump_svmlight_file(X.astype(dtype), y, f, comment="test",
                                       zero_based=zero_based)
                    f.seek(0)

                    comment = f.readline()
                    comment = str(comment, "utf-8")

                    assert_in("scikit-learn %s" % sklearn.__version__, comment)

                    comment = f.readline()
                    comment = str(comment, "utf-8")

                    assert_in(["one", "zero"][zero_based] + "-based", comment)

                    X2, y2 = load_svmlight_file(f, dtype=dtype,
                                                zero_based=zero_based)
                    assert_equal(X2.dtype, dtype)
                    assert_array_equal(X2.sorted_indices().indices, X2.indices)

                    X2_dense = X2.toarray()

                    if dtype == np.float32:
                        # allow a rounding error at the last decimal place
                        assert_array_almost_equal(
                            X_dense.astype(dtype), X2_dense, 4)
                        assert_array_almost_equal(
                            y_dense.astype(dtype), y2, 4)
                    else:
                        # allow a rounding error at the last decimal place
                        assert_array_almost_equal(
                            X_dense.astype(dtype), X2_dense, 15)
                        assert_array_almost_equal(
                            y_dense.astype(dtype), y2, 15)
def test_dump():
    Xs, y = load_svmlight_file(datafile)
    Xd = Xs.toarray()

    # slicing a csr_matrix can unsort its .indices, so test that we sort
    # those correctly
    Xsliced = Xs[np.arange(Xs.shape[0])]

    for X in (Xs, Xd, Xsliced):
        for zero_based in (True, False):
            for dtype in [np.float32, np.float64, np.int32]:
                f = BytesIO()
                # we need to pass a comment to get the version info in;
                # LibSVM doesn't grok comments so they're not put in by
                # default anymore.
                dump_svmlight_file(X.astype(dtype), y, f, comment="test", zero_based=zero_based)
                f.seek(0)

                comment = f.readline()
                try:
                    comment = str(comment, "utf-8")
                except TypeError:  # fails in Python 2.x
                    pass

                assert_in("scikit-learn %s" % sklearn.__version__, comment)

                comment = f.readline()
                try:
                    comment = str(comment, "utf-8")
                except TypeError:  # fails in Python 2.x
                    pass

                assert_in(["one", "zero"][zero_based] + "-based", comment)

                X2, y2 = load_svmlight_file(f, dtype=dtype, zero_based=zero_based)
                assert_equal(X2.dtype, dtype)
                assert_array_equal(X2.sorted_indices().indices, X2.indices)
                if dtype == np.float32:
                    assert_array_almost_equal(
                        # allow a rounding error at the last decimal place
                        Xd.astype(dtype),
                        X2.toarray(),
                        4,
                    )
                else:
                    assert_array_almost_equal(
                        # allow a rounding error at the last decimal place
                        Xd.astype(dtype),
                        X2.toarray(),
                        15,
                    )
                assert_array_equal(y, y2)
def test_dump_multilabel():
    X = [[1, 0, 3, 0, 5],
         [0, 0, 0, 0, 0],
         [0, 5, 0, 1, 0]]
    y_dense = [[0, 1, 0], [1, 0, 1], [1, 1, 0]]
    y_sparse = sp.csr_matrix(y_dense)
    for y in [y_dense, y_sparse]:
        f = BytesIO()
        dump_svmlight_file(X, y, f, multilabel=True)
        f.seek(0)
        # make sure it dumps multilabel correctly
        assert_equal(f.readline(), b("1 0:1 2:3 4:5\n"))
        assert_equal(f.readline(), b("0,2 \n"))
        assert_equal(f.readline(), b("0,1 1:5 3:1\n"))
Exemple #9
0
class DummyFile(object):
    def __init__(self, data):
        self.buffer = BytesIO(data)

    def read(self, amt=None):
        return self.buffer.read(amt)

    def fileno(self):
        return -1

    def readline(self):
        self.buffer.readline()

    def __iter__(self):
        return self.buffer.__iter__()
Exemple #10
0
def _pull_target(dtuf_main, target, expected_dgsts, expected_sizes, get_info, capfd):
    environ = {'DTUF_BLOB_INFO': '1'}
    environ.update(dtuf_main)
    assert dtuf.main.doit(['pull-target', pytest.repo, target], environ if get_info else dtuf_main) == 0
    # pylint: disable=protected-access
    encoding = capfd._capture.out.tmpfile.encoding
    capfd._capture.out.tmpfile.encoding = None
    out, err = capfd.readouterr()
    if get_info:
        outs = BytesIO(out)
        for i, size in enumerate(expected_sizes):
            assert outs.readline() == expected_dgsts[i].encode('utf-8') + b' ' + str(size).encode('utf-8') + b'\n'
            sha256 = hashlib.sha256()
            sha256.update(outs.read(size))
            assert sha256.hexdigest() == expected_dgsts[i]
        assert len(outs.read()) == 0
    else:
        pos = 0
        for i, size in enumerate(expected_sizes):
            sha256 = hashlib.sha256()
            sha256.update(out[pos:pos + size])
            pos += size
            assert sha256.hexdigest() == expected_dgsts[i]
        assert pos == len(out)
    assert err == ""
    capfd._capture.out.tmpfile.encoding = encoding
Exemple #11
0
    def from_bytes(cls, bytestr):
        # TODO: generify
        bio = BytesIO(bytestr)
        reader = ResponseReader()
        state = reader.state
        while True:
            if state is M.Complete:
                break
            elif state.type == M.NeedLine.type:
                line = bio.readline()  # TODO: limit?
                next_state = M.HaveLine(value=line)
            elif state.type == M.NeedData.type:
                data = bio.read(state.amount)
                # TODO: can this block or return None if empty etc?
                next_state = M.HaveData(value=data)
            elif state.type == M.NeedPeek.type:
                peeked = bio.peek(state.amount)
                if not peeked:
                    pass  # TODO: again, what happens on end of stream
                next_state = M.HavePeek(amount=peeked)
            else:
                raise RuntimeError('Unknown state %r' % (state,))
            state = reader.send(next_state)

        return reader.raw_response
class ResponseProxy():
    def __init__(self, req):
        self._info=req.info()
        
        if self._info.get('Content-Encoding')=='gzip' or  \
        self._info.get('Content-Encoding')=='deflate':
            data=decode_data(req);
            del self._info['Content-Encoding']
        else:
            data=req.read()
        self._data=BytesIO(data)
        req.close()
        self.url=req.geturl()
        self.code, self.msg= req.code, req.msg
        
    def read(self, *args):
        return self._data.read(*args)
    
    def readline(self):
        return self._data.readline()
    
    def info(self):
        return self._info
    
    def geturl(self):
        return self.url
    
    def close(self):
        self._data.close()
        
    def seek(self, idx):
        
        self._data.seek(idx)
Exemple #13
0
class DummySocket(object):
    def __init__(self):
        self.queue = []
        self._buffer = BytesIO()
        self.can_read = False

    @property
    def buffer(self):
        return memoryview(self._buffer.getvalue())

    def advance_buffer(self, amt):
        self._buffer.read(amt)

    def send(self, data):
        if not isinstance(data, bytes):
            raise TypeError()

        self.queue.append(data)

    def recv(self, l):
        return memoryview(self._buffer.read(l))

    def close(self):
        pass

    def readline(self):
        return memoryview(self._buffer.readline())

    def fill(self):
        pass
Exemple #14
0
 def test_readlog(self):
     s = BytesIO(b"foobar\nfoobar")
     s = tcp.Reader(s)
     assert not s.is_logging()
     s.start_log()
     assert s.is_logging()
     s.readline()
     assert s.get_log() == b"foobar\n"
     s.read(1)
     assert s.get_log() == b"foobar\nf"
     s.start_log()
     assert s.get_log() == b""
     s.read(1)
     assert s.get_log() == b"o"
     s.stop_log()
     tutils.raises(ValueError, s.get_log)
Exemple #15
0
  def test_seek_cur(self):
    for compression_type in [CompressionTypes.BZIP2, CompressionTypes.DEFLATE,
                             CompressionTypes.GZIP]:
      file_name = self._create_compressed_file(compression_type, self.content)
      with open(file_name, 'rb') as f:
        compressed_fd = CompressedFile(f, compression_type,
                                       read_size=self.read_block_size)
        reference_fd = BytesIO(self.content)

        # Test out of bound, inbound seeking in both directions
        # Note: BytesIO's seek() reports out of bound positions (if we seek
        # beyond the file), therefore we need to cap it to max_position (to
        # make it consistent with the old StringIO behavior
        for seek_position in (-1, 0, 1,
                              len(self.content) // 2,
                              len(self.content) // 2,
                              -1 * len(self.content) // 2):
          compressed_fd.seek(seek_position, os.SEEK_CUR)
          reference_fd.seek(seek_position, os.SEEK_CUR)

          uncompressed_line = compressed_fd.readline()
          expected_line = reference_fd.readline()
          self.assertEqual(uncompressed_line, expected_line)

          reference_position = reference_fd.tell()
          uncompressed_position = compressed_fd.tell()
          max_position = len(self.content)
          reference_position = min(reference_position, max_position)
          reference_fd.seek(reference_position, os.SEEK_SET)
          self.assertEqual(uncompressed_position, reference_position)
Exemple #16
0
  def test_seek_set(self):
    for compression_type in [CompressionTypes.BZIP2, CompressionTypes.DEFLATE,
                             CompressionTypes.GZIP]:
      file_name = self._create_compressed_file(compression_type, self.content)
      with open(file_name, 'rb') as f:
        compressed_fd = CompressedFile(f, compression_type,
                                       read_size=self.read_block_size)
        reference_fd = BytesIO(self.content)

        # Note: BytesIO's tell() reports out of bound positions (if we seek
        # beyond the file), therefore we need to cap it to max_position
        # _CompressedFile.tell() always stays within the bounds of the
        # uncompressed content.
        # Negative seek position argument is not supported for BytesIO with
        # whence set to SEEK_SET.
        for seek_position in (0, 1,
                              len(self.content)-1, len(self.content),
                              len(self.content) + 1):
          compressed_fd.seek(seek_position, os.SEEK_SET)
          reference_fd.seek(seek_position, os.SEEK_SET)

          uncompressed_line = compressed_fd.readline()
          reference_line = reference_fd.readline()
          self.assertEqual(uncompressed_line, reference_line)

          uncompressed_position = compressed_fd.tell()
          reference_position = reference_fd.tell()
          max_position = len(self.content)
          reference_position = min(reference_position, max_position)
          self.assertEqual(uncompressed_position, reference_position)
    def actual_process(self, data):
        cols = data['columns']
        csv = BytesIO(data['csv'])  # The file is Bytes, encoded.
        encoding = self.guess_encoding(data['csv'])
        # TODO: Delivery?

        try:
            dialect = Sniffer().sniff(csv.readline(), [',', '\t'])
        except err:
            dialect = excel
        csv.seek(0)
        reader = UnicodeDictReader(csv, cols, encoding=encoding, dialect=dialect)
        profiles = []
        retval = None
        try:
            next(reader)  # Skip the first row (the header)
        except UnicodeDecodeError as e:
            t = guess_content_type(body=data['csv'])[0]
            msg = 'The file is different from what is required. (It '\
                  'appears to be a {0} file.) Please check that  you '\
                  'selected the correct CSV file.'
            m = {'status': -2,
                 'message': [msg.format(t.split('/')[0]), str(e), t]}
            retval = to_json(m)
        except StopIteration:
            msg = 'The file appears to be empty. Please check that you '\
                  'generated the CSV file correctly.'
            m = {'status': -5, 'message': [msg, 'no-rows']}
            retval = to_json(m)
        else:
            rowCount = 0
            for row in reader:
                rowCount += 1
                if len(row) != len(cols):
                    # *Technically* the number of columns in CSV rows can be
                    # arbitary. However, I am enforcing a strict
                    # interpretation for sanity's sake.
                    msg = 'Row {0} had {1} columns, rather than {2}. ' \
                          'Please check the file.'
                    # Name hack.
                    m = {'status': -3,
                         'message': [msg.format(rowCount, len(row),
                                     len(cols))]}
                    retval = to_json(m)
                    profiles = []
                    # --=mpj17=-- I think this is the first time I have used
                    # break in actual code. Wow.
                    break
                profiles.append(row)
        if profiles and (not retval):
            retval = to_json(profiles)
        elif (not profiles) and not(retval):
            msg = 'No rows were found in the CSV file. '\
                  'Please check that  you selected the correct CSV file.'
            m = {'status': -4,
                 'message': [msg, 'no-rows']}
            retval = to_json(m)
        assert retval, 'No retval'
        return retval
Exemple #18
0
 def test_wrap(self):
     s = BytesIO(b"foobar\nfoobar")
     s.flush()
     s = tcp.Reader(s)
     assert s.readline() == b"foobar\n"
     assert s.readline() == b"foobar"
     # Test __getattr__
     assert s.isatty
Exemple #19
0
def _decode(raw):
    """As per PEP 263, decode raw data according to the encoding specified in
       the first couple lines of the data, or in ASCII.  Non-ASCII data without
       an encoding specified will cause UnicodeDecodeError to be raised.
    """
    assert type(raw) is bytes  # sanity check

    decl_re = re.compile(br'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)')

    def get_declaration(line):
        match = decl_re.match(line)
        if match:
            return match.group(1)
        return None

    encoding = None
    fulltext = b''
    sio = BytesIO(raw)
    for line in (sio.readline(), sio.readline()):
        potential = get_declaration(line)
        if potential is not None:
            if encoding is None:

                # If both lines match, use the first. This matches Python's
                # observed behavior.

                encoding = potential
                munged = b'# encoding set to ' + encoding + b'\n'

            else:

                # But always munge any encoding line. We can't simply remove
                # the line, because we want to preserve the line numbering.
                # However, later on when we ask Python to exec a unicode
                # object, we'll get a SyntaxError if we have a well-formed
                # `coding: # ` line in it.

                munged = b'# encoding NOT set to ' + potential + b'\n'

            line = line.split(b'#')[0] + munged

        fulltext += line
    fulltext += sio.read()
    sio.close()
    encoding = encoding.decode('ascii') if encoding else 'ascii'
    return fulltext.decode(encoding)
Exemple #20
0
 def readline(self, size=None):
     while b"\n" not in self.buffer and (size is None or len(self.buffer) < size):
         if size:
             # since size is not None here, len(self.buffer) < size
             chunk = self._read_limited(size - len(self.buffer))
         else:
             chunk = self._read_limited()
         if not chunk:
             break
         self.buffer += chunk
     sio = BytesIO(self.buffer)
     if size:
         line = sio.readline(size)
     else:
         line = sio.readline()
     self.buffer = sio.read()
     return line
Exemple #21
0
 def test_load_physics(self):
     from io import BytesIO
     stream = BytesIO(b"""$PhysicalNames
     1
     2 1 "lower"
     $EndPhysicalNames""")
     # Check header.
     self.assertEqual(stream.readline(), b'$PhysicalNames\n')
     # Check physics body.
     res = gmsh.Gmsh._load_physics(stream)
     self.assertEqual(list(res.keys()), ['physics'])
     res = res['physics']
     self.assertEqual(len(res), 2)
     self.assertEqual(res[0], b'1')
     self.assertEqual(res[1], b'2 1 "lower"')
     # Check trailing.
     self.assertEqual(stream.readline(), b'')
Exemple #22
0
class Request(BaseHTTPRequestHandler):
    def __init__(self, data):
        _, self.body = decode_utf8(data).split('\r\n\r\n', 1)
        self.rfile = BytesIO(encode_utf8(data))
        self.raw_requestline = self.rfile.readline()
        self.error_code = self.error_message = None
        self.parse_request()
        self.method = self.command
def test_read_inventory_v2():
    f = BytesIO(inventory_v2)
    f.readline()
    invdata1 = read_inventory_v2(f, "/util", posixpath.join)

    # try again with a small buffer size to test the chunking algorithm
    f = BytesIO(inventory_v2)
    f.readline()
    invdata2 = read_inventory_v2(f, "/util", posixpath.join, bufsize=5)

    assert invdata1 == invdata2

    assert len(invdata1["py:module"]) == 2
    assert invdata1["py:module"]["module1"] == ("foo", "2.0", "/util/foo.html#module-module1", "Long Module desc")
    assert invdata1["py:module"]["module2"] == ("foo", "2.0", "/util/foo.html#module-module2", "-")
    assert invdata1["py:function"]["module1.func"][2] == "/util/sub/foo.html#module1.func"
    assert invdata1["c:function"]["CFunc"][2] == "/util/cfunc.html#CFunc"
    assert invdata1["std:term"]["a term"][2] == "/util/glossary.html#term-a-term"
Exemple #24
0
class HTTPRequest(BaseHTTPRequestHandler):
    def __init__(self, request):
        self.rfile = BytesIO(request)
        self.raw_requestline = self.rfile.readline()
        self.error_code = self.error_message = None
        self.parse_request()

    def send_error(self, code, message):
        self.error_code = code
        self.error_message = message
Exemple #25
0
def test_read_inventory_v2():
    f = BytesIO(inventory_v2)
    f.readline()
    invdata1 = read_inventory_v2(f, '/util', posixpath.join)

    # try again with a small buffer size to test the chunking algorithm
    f = BytesIO(inventory_v2)
    f.readline()
    invdata2 = read_inventory_v2(f, '/util', posixpath.join, bufsize=5)

    assert invdata1 == invdata2

    assert len(invdata1['py:module']) == 2
    assert invdata1['py:module']['module1'] == \
           ('foo', '2.0', '/util/foo.html#module-module1', 'Long Module desc')
    assert invdata1['py:module']['module2'] == \
           ('foo', '2.0', '/util/foo.html#module-module2', '-')
    assert invdata1['py:function']['module1.func'][2] == \
           '/util/sub/foo.html#module1.func'
    assert invdata1['c:function']['CFunc'][2] == '/util/cfunc.html#CFunc'
Exemple #26
0
		class Request(BaseHTTPRequestHandler):

			# noinspection PyMissingConstructor
			def __init__(self, request_text):
				self.rfile = BytesIO(request_text)
				self.raw_requestline = self.rfile.readline()
				self.error_code = self.error_message = None
				self.parse_request()

			def send_error(self, code, message=None):
				self.error_code = code
				self.error_message = message
Exemple #27
0
def parse_csv_dataset(data, handle_units):
    fobj = BytesIO(data)
    names, units = parse_csv_header(fobj.readline().decode('utf-8'))
    arrs = np.genfromtxt(fobj, dtype=None, names=names, delimiter=',', unpack=True,
                         converters={'date': lambda s: parse_iso_date(s.decode('utf-8'))})
    d = dict()
    for f in arrs.dtype.fields:
        dat = arrs[f]
        if dat.dtype == np.object:
            dat = dat.tolist()
        d[f] = handle_units(dat, units.get(f, None))
    return d
Exemple #28
0
 def test_load_periodic(self):
     from io import BytesIO
     stream = BytesIO(b"""$Periodic
     1
     0 1 3
     1
     1 3
     $EndPeriodic""") # a triangle.
     # Check header.
     self.assertEqual(stream.readline(), b'$Periodic\n')
     # Check periodic body.
     res = gmsh.Gmsh._load_periodic(stream)
     self.assertEqual(list(res.keys()), ['periodics'])
     res = res['periodics']
     self.assertEqual(len(res), 1)
     res = res[0]
     self.assertEqual(
         str([(key, res[key]) for key in sorted(res.keys())]),
         str([('mtag', 3), ('ndim', 0),
              ('nodes', np.array([[1, 3]], dtype='int32')), ('stag', 1)]))
     # Check trailing.
     self.assertEqual(stream.readline(), b'')
Exemple #29
0
    def readline(self, size=None):
        while b'\n' not in self.buffer and \
              (size is None or len(self.buffer) < size):
            # 直到读到 \n 为止
            if size:
                # since size is not None here, len(self.buffer) < size
                chunk = self._read_limited(size - len(self.buffer))
            else:
                chunk = self._read_limited()

            if not chunk:
                break

            self.buffer += chunk

        sio = BytesIO(self.buffer)
        if size:
            line = sio.readline(size)
        else:
            line = sio.readline()

        self.buffer = sio.read() # 把剩下的数据放入 buffer
        return line
    def import_catalog(self, content, hash):
        data = BytesIO(content)
        first_row = data.readline()
        date = re.match(b'[^\d]+(\d{1,2}).(\d{1,2})\.(\d{4}).*', first_row)
        if not date:
            raise CommandError('Incorrect first row')

        day, month, year = date.groups()
        set_date = datetime(int(year), int(month), int(day))
        data.readline()  # Empty line
        data.readline()  # Header line
        catalog, _ = PriceCatalog.objects.get_or_create(
            date=set_date,
            hash=hash,
        )
        for line in data:
            rd = line.decode('utf-8').split('\t')
            defaults = {
                'name': rd[1],
                'brand': rd[2],
            }
            for i, field in enumerate(['bottle_size', 'price', 'price_per_liter']):
                if rd[3 + i]:
                    defaults[field] = float_(rd[3 + i])

            defaults['abv'] = float_(rd[20])
            if defaults.get('price_per_liter') and defaults.get('abv'):
                defaults['price_per_alcohol'] = 100 * defaults['price_per_liter'] / defaults['abv']
            item, _ = Item.objects.update_or_create(
                number=int(rd[0]),
                defaults=defaults
            )
            catalog_item, _ = PriceCatalogItem.objects.get_or_create(
                price_catalog=catalog,
                item=item,
            )
            print(item)
Exemple #31
0
def test_version_2_0():
    f = BytesIO()
    # requires more than 2 byte for header
    dt = [(("%d" % i) * 100, float) for i in range(500)]
    d = np.ones(1000, dtype=dt)

    format.write_array(f, d, version=(2, 0))
    with warnings.catch_warnings(record=True) as w:
        warnings.filterwarnings('always', '', UserWarning)
        format.write_array(f, d)
        assert_(w[0].category is UserWarning)

    # check alignment of data portion
    f.seek(0)
    header = f.readline()
    assert_(len(header) % format.ARRAY_ALIGN == 0)

    f.seek(0)
    n = format.read_array(f)
    assert_array_equal(d, n)

    # 1.0 requested but data cannot be saved this way
    assert_raises(ValueError, format.write_array, f, d, (1, 0))
Exemple #32
0
class TwoWayStringIO(object):
    def __init__(self, readable_data):
        self.readable_data = readable_data
        self.read_buffer = BytesIO(self.readable_data)
        self.write_buffer = BytesIO()
        self.written_data = None
        self.closed = False

    def read(self, *args, **kwargs):
        return self.read_buffer.read(*args, **kwargs)

    def readline(self, *args, **kwargs):
        return self.read_buffer.readline(*args, **kwargs)

    def write(self, *args, **kwargs):
        return self.write_buffer.write(*args, **kwargs)

    def close(self):
        self.closed = True
        self.written_data = self.write_buffer.getvalue()
        self.read_buffer.close()

    def flush(self):
        self.write_buffer.flush()
Exemple #33
0
class FastIO(IOBase):
    newlines = 0

    def __init__(self, file):
        import os
        self.os = os
        self._fd = file.fileno()
        self.buffer = BytesIO()
        self.writable = "x" in file.mode or "r" not in file.mode
        self.write = self.buffer.write if self.writable else None

    def read(self):
        while True:
            b = self.os.read(self._fd,
                             max(self.os.fstat(self._fd).st_size, BUFSIZE))
            if not b:
                break
            ptr = self.buffer.tell()
            self.buffer.seek(0, 2), self.buffer.write(b), self.buffer.seek(ptr)
        self.newlines = 0
        return self.buffer.read()

    def readline(self):
        while self.newlines == 0:
            b = self.os.read(self._fd,
                             max(self.os.fstat(self._fd).st_size, BUFSIZE))
            self.newlines = b.count(b"\n") + (not b)
            ptr = self.buffer.tell()
            self.buffer.seek(0, 2), self.buffer.write(b), self.buffer.seek(ptr)
        self.newlines -= 1
        return self.buffer.readline()

    def flush(self):
        if self.writable:
            self.os.write(self._fd, self.buffer.getvalue())
            self.buffer.truncate(0), self.buffer.seek(0)
Exemple #34
0
class TestUnSeekable:
    def __init__(self, text):
        if not isinstance(text, bytes):
            text = text.encode('utf-8')
        self._file = BytesIO(text)
        self.log = []

    def tell(self):
        return self._file.tell()

    def seek(self, offset, whence=0):
        assert False

    def read(self, size=-1):
        self.log.append(("read", size))
        return self._file.read(size)

    def readline(self, size=-1):
        self.log.append(("readline", size))
        return self._file.readline(size)

    def readlines(self, sizehint=-1):
        self.log.append(("readlines", sizehint))
        return self._file.readlines(sizehint)
Exemple #35
0
    def extract_url(self, data, host, port, default_port):
        if self.first_line_read:
            return

        self.first_line_read = True
        buff = BytesIO(data)
        line = to_native_str(buff.readline(), 'latin-1')

        parts = line.split(' ', 2)
        verb = parts[0]
        path = parts[1]

        if verb == "CONNECT":
            parts = path.split(":", 1)
            self.connect_host = parts[0]
            self.connect_port = int(parts[1]) if len(parts) > 1 else default_port
            self.warc_headers['WARC-Proxy-Host'] = "https://{0}:{1}".format(host, port)
            return

        if self.connect_host:
            host = self.connect_host

        if self.connect_port:
            port = self.connect_port

        if path.startswith(('http:', 'https:')):
            self.warc_headers['WARC-Proxy-Host'] = "http://{0}:{1}".format(host, port)
            self.url = path
            return

        scheme = 'https' if default_port == 443 else 'http'
        self.url = scheme + '://' + host
        if port != default_port:
            self.url += ':' + str(port)

        self.url += path
Exemple #36
0
class DummySocket(object):
    def __init__(self):
        self.queue = []
        self._buffer = BytesIO()
        self._read_counter = 0
        self.can_read = False

    @property
    def buffer(self):
        return memoryview(self._buffer.getvalue()[self._read_counter:])

    def advance_buffer(self, amt):
        self._read_counter += amt
        self._buffer.read(amt)

    def send(self, data):
        if not isinstance(data, bytes):
            raise TypeError()

        self.queue.append(data)

    def recv(self, l):
        data = self._buffer.read(l)
        self._read_counter += len(data)
        return memoryview(data)

    def close(self):
        pass

    def readline(self):
        line = self._buffer.readline()
        self._read_counter += len(line)
        return memoryview(line)

    def fill(self):
        pass
Exemple #37
0
    def from_bytes(cls, bytestr):
        bio = BytesIO(bytestr)
        reader = RequestReader()
        state = reader.state
        while True:
            if state is M.Complete:
                break
            elif state.type == M.NeedLine.type:
                line = bio.readline()  # TODO: limit?
                next_state = M.HaveLine(value=line)
            elif state.type == M.NeedData.type:
                data = bio.read(state.amount)
                # TODO: can this block or return None if empty etc?
                next_state = M.HaveData(value=data)
            elif state.type == M.NeedPeek.type:
                peeked = bio.peek(state.amount)
                if not peeked:
                    pass  # TODO: again, what happens on end of stream
                next_state = M.HavePeek(amount=peeked)
            else:
                raise RuntimeError('Unknown state %r' % (state,))
            state = reader.send(next_state)

        return reader.raw_request
Exemple #38
0
    def get(self, path, host=None, headers: dict = None):
        headers = headers or {}
        host = host or self._host
        if host:
            headers['Host'] = host
        headers_str = '\r\n'.join(f'{key}: {val}'
                                  for (key, val) in headers.items())
        http_query = f'GET {path} HTTP/1.0\r\n{headers_str}\r\n\r\n'
        self._stream.send(http_query.encode())

        raw_response = recv_all(self._stream)
        header, body = raw_response.split(b'\r\n\r\n', 1)

        f = BytesIO(header)
        request_line = f.readline().split(b' ')
        protocol, status = request_line[:2]

        headers = parse_headers(f)
        if headers['Content-Encoding'] == 'deflate':
            body = zlib.decompress(body)
        elif headers['Content-Encoding'] == 'gzip':
            body = gzip.decompress(body)

        return int(status), body
 def _build_HTTPMessage(self, raw_headers):
     status_and_headers = BytesIO(raw_headers)
     # Get rid of the status line
     status_and_headers.readline()
     msg = parse_headers(status_and_headers)
     return msg.get
Exemple #40
0
class HTTPRequest(BaseHTTPRequestHandler):
    def __init__(self, reqFile=None, **kwargs):
        self.regexContentLengthPattern = re.compile(
            r"\s*[c|C]ontent-[l|L]ength:\s*([0-9]+)\s*")
        self.session = kwargs.get('session', None)
        if self.session == None:
            self.session = requests.Session()
        self.instanciateConnection(**kwargs)
        if reqFile != None:
            with open(reqFile, 'rb') as requestFile:
                self.updateRequestFromRawValue(requestFile.read())

    #instanciate the request from raw
    def updateRequestFromRawValue(self, raw):
        self.rfile = BytesIO(raw)
        self.raw_requestline = self.rfile.readline()
        #self.raw_requestline=requestFile.readline()
        self.error_code = self.error_message = None
        #from BaseHTTPRequesetHandler class
        self.parse_request()
        #the above function does not parse request body
        self.parseRequestBody()
        #transfer cookies (from requestFile) from header to Cookie param
        self.parseCookiesFromHeaders()
        #response filled later
        self.response = None
        self.addURLToRequest()

    def instanciateConnection(self, **kwargs):
        self.connection = Connection.Connection(**kwargs)

    def printFields(self):
        print("Error Code: ", self.error_code)  # None  (check this first)
        print("Command: ", self.command)  # "GET"
        print("Path: ", self.path)  # "/who/ken/trust.html"
        #print self.request_version  # "HTTP/1.1"
        print("Nb of Headers: ", len(self.headers))  # 3
        print("Header keys: ",
              self.headers.keys())  # ['accept-charset', 'host', 'accept']
        print("URL: ", self.URL)
        print(self.cookies.output(attrs=[], header="Cookie:"))
        if self.command == "POST":
            print("Post body.:", self.postBody)
        print("rfile:", self.rfile.getvalue())

    # create the URL from host, path and protocol
    def addURLToRequest(self):
        url = self.headers['host'] + self.path
        if self.connection.isTLS:
            url = "https://" + url
        else:
            url = "http://" + url

        self.URL = url

    def parseRequestBody(self):
        if 'content-length' in self.headers.keys():
            content_len = int(self.headers['content-length'], 0)
            self.postBody = self.rfile.read(content_len)
        elif 'Content-Length' in self.headers.keys():
            content_len = int(self.headers['Content-Length'], 0)
            self.postBody = self.rfile.read(content_len)
        else:
            self.postBody = None

    def updateContentLengthInRawReq(self, newLength):
        if self.command == "POST":
            #return to the beginning
            self.rfile.seek(0)
            #the first line contain the command
            self.rfile.readline()
            while True:
                line = str(self.rfile.readline(), 'utf-8')
                if not line:
                    break
                pattern = self.regexContentLengthPattern.search(line)
                if pattern:
                    raw = str(self.rfile.getvalue(), 'utf-8').replace(
                        pattern.string,
                        "content-length: " + str(newLength) + " \r\n")
                    self.updateRequestFromRawValue(bytes(raw, 'utf-8'))
                    break

    def getContentLengthInRawReq(self):
        if self.command == "POST":
            #return to the beginning
            self.rfile.seek(0)
            #the first line contain the command
            self.rfile.readline()
            while True:
                line = str(self.rfile.readline(), 'utf-8')
                if not line:
                    break
                pattern = self.regexContentLengthPattern.search(line)
                if pattern:
                    return int(pattern.group(1))

    def parseCookiesFromHeaders(self):
        #add to the cookie param
        self.cookies = http.cookies.SimpleCookie()
        if 'cookie' in self.headers.keys():
            self.cookies.load('Cookie: ' + self.headers['cookie'])
            #remove the cookie from the (normal) header
            del self.headers['cookie']
        if 'Cookie' in self.headers.keys():
            self.cookies.load('Cookie: ' + self.headers['Cookie'])
            del self.headers['Cookie']

    def send_error(self, code, message):
        self.error_code = code
        self.error_message = message

    #to be optimized later (the whole request is put to memory then replace then re-write, not so efficient)
    def replaceString(self, strToBeDel, strToBePst):
        if strToBeDel in str(self.rfile.getvalue(), 'utf-8'):
            strng = str(self.rfile.getvalue(),
                        'utf-8').replace(strToBeDel, strToBePst)
            self.updateRequestFromRawValue(bytes(strng, 'utf-8'))
            previousContLeng = self.getContentLengthInRawReq()
            #for now only utf-8 is supported, so the length should be in bytes here.
            delta = len(strToBePst) - len(strToBeDel)
            self.updateContentLengthInRawReq(previousContLeng + delta)

    def send(self):
        #cookies must be formatted for Requests lib
        cookies = {}
        for key, morsel in self.cookies.items():
            cookies[key] = morsel.value

        if self.command == "GET":
            self.response = self.session.get(
                self.URL,
                headers=self.headers,
                proxies=self.connection.proxies,
                cookies=cookies,
                verify=self.connection.verifyTLSCert)
        if self.command == "POST":
            self.response = self.session.post(
                self.URL,
                headers=self.headers,
                proxies=self.connection.proxies,
                cookies=cookies,
                verify=self.connection.verifyTLSCert,
                data=self.postBody)
        if self.command == "HEAD":
            self.response = self.session.head(
                self.URL,
                headers=self.headers,
                proxies=self.connection.proxies,
                cookies=cookies,
                verify=self.connection.verifyTLSCert,
                data=self.postBody)
        if self.command == "PUT":
            self.response = self.session.put(
                self.URL,
                headers=self.headers,
                proxies=self.connection.proxies,
                cookies=cookies,
                verify=self.connection.verifyTLSCert,
                data=self.postBody)
        if self.command == "DELETE":
            self.response = self.session.delete(
                self.URL,
                headers=self.headers,
                proxies=self.connection.proxies,
                cookies=cookies,
                verify=self.connection.verifyTLSCert,
                data=self.postBody)

        self.elapsed = self.response.elapsed.total_seconds()
Exemple #41
0
class HTTPRequest(BaseHTTPRequestHandler):
    # http://stackoverflow.com/a/5955949

    scheme = 'http'

    # noinspection PyMissingConstructor
    def __init__(self, request_bytes, scheme):
        assert isinstance(request_bytes, bytes)

        self.scheme = scheme
        self.rfile = BytesIO(request_bytes)
        self.raw_requestline = self.rfile.readline()
        self.error_code = self.error_message = None
        self.parse_request()

        if self.error_message:
            raise Exception(self.error_message)

        # Replace headers with simple dict to coup differences in Py2 and Py3
        self.headers = capitalize_keys(dict(getattr(self, 'headers', {})))

    def send_error(self, code, message=None, explain=None):
        self.error_code = code
        self.error_message = message

    def extract_fields(self, field=None, available_fields=None):
        if available_fields is None:
            available_fields = AVAILABLE_FIELDS

        if (field is not None) and field not in available_fields:
            msg = "Unexpected field '{}'. Expected one of {}."
            msg = msg.format(field, ', '.join(available_fields))
            raise ValueError(msg)

        if field is None:
            field = '|'.join(available_fields)
        is_field = r':({}) (.+): (.+)'.format(field)

        fields = []
        remaining_request = []
        cursor = self.rfile.tell()
        for i, line in enumerate(self.rfile.readlines()):
            line = line.decode('utf-8')
            try:
                field, key, val = re.match(is_field, line).groups()
            except AttributeError:
                remaining_request.append(line)
                continue
            fields.append((field.strip(), key.strip(), val.strip()))

        remaining_request = BytesIO(
            '\n'.join(remaining_request).encode('utf-8').strip())
        remaining_request.seek(0)
        self.rfile.seek(cursor)

        return (fields, remaining_request)

    def auth(self):
        try:
            method, token = self.headers.get('Authorization').split()
        except (AttributeError, KeyError, ValueError):
            return None, None
        if not isinstance(token, bytes):
            token = token.encode('utf-8')
        if method == 'Basic':
            return method, base64.b64decode(token).decode('utf-8')
        else:
            return method, token

    def url(self):
        base_url = '{}://{}{}'.format(self.scheme,
                                      self.headers.get('Host', 'nohost'),
                                      self.path)

        params, _ = self.extract_fields('query')
        params = [(p[1], p[2]) for p in params]

        if params:
            new_url = add_url_params(base_url, params)
        else:
            new_url = base_url

        return new_url

    def data(self):
        _, payload_bytes = self.extract_fields(None)
        payload_bytes = payload_bytes.read()
        if payload_bytes:
            if is_json(self.headers.get('Content-Type', '')):
                assert isinstance(payload_bytes, bytes)
                payload_str = payload_bytes.decode('utf-8')
                return ordered(json.loads(payload_str))
            else:
                return payload_bytes
Exemple #42
0
class HttpRequest(object):
    """A basic HTTP request."""

    # The encoding used in GET/POST dicts. None means use default setting.
    _encoding = None
    _upload_handlers = []

    def __init__(self):
        # WARNING: The `WSGIRequest` subclass doesn't call `super`.
        # Any variable assignment made here should also happen in
        # `WSGIRequest.__init__()`.

        self.GET = QueryDict(mutable=True)
        self.POST = QueryDict(mutable=True)
        self.COOKIES = {}
        self.META = {}
        self.FILES = MultiValueDict()

        self.path = ''
        self.path_info = ''
        self.method = None
        self.resolver_match = None
        self._post_parse_error = False
        self.content_type = None
        self.content_params = None

    def __repr__(self):
        if self.method is None or not self.get_full_path():
            return force_str('<%s>' % self.__class__.__name__)
        return force_str(
            '<%s: %s %r>' % (self.__class__.__name__, self.method, force_str(self.get_full_path()))
        )

    def _get_raw_host(self):
        """
        Return the HTTP host using the environment or request headers. Skip
        allowed hosts protection, so may return an insecure host.
        """
        # We try three options, in order of decreasing preference.
        if settings.USE_X_FORWARDED_HOST and (
                'HTTP_X_FORWARDED_HOST' in self.META):
            host = self.META['HTTP_X_FORWARDED_HOST']
        elif 'HTTP_HOST' in self.META:
            host = self.META['HTTP_HOST']
        else:
            # Reconstruct the host using the algorithm from PEP 333.
            host = self.META['SERVER_NAME']
            server_port = self.get_port()
            if server_port != ('443' if self.is_secure() else '80'):
                host = '%s:%s' % (host, server_port)
        return host

    def get_host(self):
        """Return the HTTP host using the environment or request headers."""
        host = self._get_raw_host()

        # There is no hostname validation when DEBUG=True
        if settings.DEBUG:
            return host

        domain, port = split_domain_port(host)
        if domain and validate_host(domain, settings.ALLOWED_HOSTS):
            return host
        else:
            msg = "Invalid HTTP_HOST header: %r." % host
            if domain:
                msg += " You may need to add %r to ALLOWED_HOSTS." % domain
            else:
                msg += " The domain name provided is not valid according to RFC 1034/1035."
            raise DisallowedHost(msg)

    def get_port(self):
        """Return the port number for the request as a string."""
        if settings.USE_X_FORWARDED_PORT and 'HTTP_X_FORWARDED_PORT' in self.META:
            port = self.META['HTTP_X_FORWARDED_PORT']
        else:
            port = self.META['SERVER_PORT']
        return str(port)

    def get_full_path(self, force_append_slash=False):
        # RFC 3986 requires query string arguments to be in the ASCII range.
        # Rather than crash if this doesn't happen, we encode defensively.
        return '%s%s%s' % (
            escape_uri_path(self.path),
            '/' if force_append_slash and not self.path.endswith('/') else '',
            ('?' + iri_to_uri(self.META.get('QUERY_STRING', ''))) if self.META.get('QUERY_STRING', '') else ''
        )

    def get_signed_cookie(self, key, default=RAISE_ERROR, salt='', max_age=None):
        """
        Attempts to return a signed cookie. If the signature fails or the
        cookie has expired, raises an exception... unless you provide the
        default argument in which case that value will be returned instead.
        """
        try:
            cookie_value = self.COOKIES[key]
        except KeyError:
            if default is not RAISE_ERROR:
                return default
            else:
                raise
        try:
            value = signing.get_cookie_signer(salt=key + salt).unsign(
                cookie_value, max_age=max_age)
        except signing.BadSignature:
            if default is not RAISE_ERROR:
                return default
            else:
                raise
        return value

    def get_raw_uri(self):
        """
        Return an absolute URI from variables available in this request. Skip
        allowed hosts protection, so may return insecure URI.
        """
        return '{scheme}://{host}{path}'.format(
            scheme=self.scheme,
            host=self._get_raw_host(),
            path=self.get_full_path(),
        )

    def build_absolute_uri(self, location=None):
        """
        Builds an absolute URI from the location and the variables available in
        this request. If no ``location`` is specified, the absolute URI is
        built on ``request.get_full_path()``. Anyway, if the location is
        absolute, it is simply converted to an RFC 3987 compliant URI and
        returned and if location is relative or is scheme-relative (i.e.,
        ``//example.com/``), it is urljoined to a base URL constructed from the
        request variables.
        """
        if location is None:
            # Make it an absolute url (but schemeless and domainless) for the
            # edge case that the path starts with '//'.
            location = '//%s' % self.get_full_path()
        bits = urlsplit(location)
        if not (bits.scheme and bits.netloc):
            current_uri = '{scheme}://{host}{path}'.format(scheme=self.scheme,
                                                           host=self.get_host(),
                                                           path=self.path)
            # Join the constructed URL with the provided location, which will
            # allow the provided ``location`` to apply query strings to the
            # base path as well as override the host, if it begins with //
            location = urljoin(current_uri, location)
        return iri_to_uri(location)

    def _get_scheme(self):
        """
        Hook for subclasses like WSGIRequest to implement. Returns 'http' by
        default.
        """
        return 'http'

    @property
    def scheme(self):
        if settings.SECURE_PROXY_SSL_HEADER:
            try:
                header, value = settings.SECURE_PROXY_SSL_HEADER
            except ValueError:
                raise ImproperlyConfigured(
                    'The SECURE_PROXY_SSL_HEADER setting must be a tuple containing two values.'
                )
            if self.META.get(header) == value:
                return 'https'
        return self._get_scheme()

    def is_secure(self):
        return self.scheme == 'https'

    def is_ajax(self):
        return self.META.get('HTTP_X_REQUESTED_WITH') == 'XMLHttpRequest'

    @property
    def encoding(self):
        return self._encoding

    @encoding.setter
    def encoding(self, val):
        """
        Sets the encoding used for GET/POST accesses. If the GET or POST
        dictionary has already been created, it is removed and recreated on the
        next access (so that it is decoded correctly).
        """
        self._encoding = val
        if hasattr(self, '_get'):
            del self._get
        if hasattr(self, '_post'):
            del self._post

    def _initialize_handlers(self):
        self._upload_handlers = [uploadhandler.load_handler(handler, self)
                                 for handler in settings.FILE_UPLOAD_HANDLERS]

    @property
    def upload_handlers(self):
        if not self._upload_handlers:
            # If there are no upload handlers defined, initialize them from settings.
            self._initialize_handlers()
        return self._upload_handlers

    @upload_handlers.setter
    def upload_handlers(self, upload_handlers):
        if hasattr(self, '_files'):
            raise AttributeError("You cannot set the upload handlers after the upload has been processed.")
        self._upload_handlers = upload_handlers

    def parse_file_upload(self, META, post_data):
        """Returns a tuple of (POST QueryDict, FILES MultiValueDict)."""
        self.upload_handlers = ImmutableList(
            self.upload_handlers,
            warning="You cannot alter upload handlers after the upload has been processed."
        )
        parser = MultiPartParser(META, post_data, self.upload_handlers, self.encoding)
        return parser.parse()

    @property
    def body(self):
        if not hasattr(self, '_body'):
            if self._read_started:
                raise RawPostDataException("You cannot access body after reading from request's data stream")
            try:
                self._body = self.read()
            except IOError as e:
                six.reraise(UnreadablePostError, UnreadablePostError(*e.args), sys.exc_info()[2])
            self._stream = BytesIO(self._body)
        return self._body

    def _mark_post_parse_error(self):
        self._post = QueryDict('')
        self._files = MultiValueDict()
        self._post_parse_error = True

    def _load_post_and_files(self):
        """Populate self._post and self._files if the content-type is a form type"""
        if self.method != 'POST':
            self._post, self._files = QueryDict('', encoding=self._encoding), MultiValueDict()
            return
        if self._read_started and not hasattr(self, '_body'):
            self._mark_post_parse_error()
            return

        if self.content_type == 'multipart/form-data':
            if hasattr(self, '_body'):
                # Use already read data
                data = BytesIO(self._body)
            else:
                data = self
            try:
                self._post, self._files = self.parse_file_upload(self.META, data)
            except MultiPartParserError:
                # An error occurred while parsing POST data. Since when
                # formatting the error the request handler might access
                # self.POST, set self._post and self._file to prevent
                # attempts to parse POST data again.
                # Mark that an error occurred. This allows self.__repr__ to
                # be explicit about it instead of simply representing an
                # empty POST
                self._mark_post_parse_error()
                raise
        elif self.content_type == 'application/x-www-form-urlencoded':
            self._post, self._files = QueryDict(self.body, encoding=self._encoding), MultiValueDict()
        else:
            self._post, self._files = QueryDict('', encoding=self._encoding), MultiValueDict()

    def close(self):
        if hasattr(self, '_files'):
            for f in chain.from_iterable(l[1] for l in self._files.lists()):
                f.close()

    # File-like and iterator interface.
    #
    # Expects self._stream to be set to an appropriate source of bytes by
    # a corresponding request subclass (e.g. WSGIRequest).
    # Also when request data has already been read by request.POST or
    # request.body, self._stream points to a BytesIO instance
    # containing that data.

    def read(self, *args, **kwargs):
        self._read_started = True
        try:
            return self._stream.read(*args, **kwargs)
        except IOError as e:
            six.reraise(UnreadablePostError, UnreadablePostError(*e.args), sys.exc_info()[2])

    def readline(self, *args, **kwargs):
        self._read_started = True
        try:
            return self._stream.readline(*args, **kwargs)
        except IOError as e:
            six.reraise(UnreadablePostError, UnreadablePostError(*e.args), sys.exc_info()[2])

    def xreadlines(self):
        while True:
            buf = self.readline()
            if not buf:
                break
            yield buf

    __iter__ = xreadlines

    def readlines(self):
        return list(iter(self))
Exemple #43
0
class HTTPRequest(BaseHTTPRequestHandler):
    def __init__(self, request_text):
        self.rfile = BytesIO(request_text)
        self.raw_requestline = self.rfile.readline()
        self.error_code = self.error_message = None
        self.parse_request()
    def nntp_server(self):
        """
        A fake nntp server that generates responses like a real one

        It lets us test the protocol by simulating different responses.
        """

        # Set io_wait flag
        self._io_wait.set()

        # Send Welcome Message
        if not self.sent_welcome:
            welcome_str = "200 l2g.caronc.dummy NNRP Service Ready"
            if self._can_post:
                welcome_str += " (posting ok)"
            if self._has_yenc:
                welcome_str += " (yEnc enabled)"
            try:
                self.socket.send(welcome_str + NNTP_EOD)
            except:
                # connection lost
                # print('DEBUG: SOCKET ERROR DURING SEND (EXITING)....')
                return

            self.sent_welcome = True

        data = BytesIO()
        d_len = data.tell()

        while self._active.is_set() and self.socket.connected:
            # print('DEBUG: SERVER LOOP')

            # ptr manipulation
            d_ptr = data.tell()
            if d_ptr > 32768:
                # Truncate
                data = BytesIO(data.read())
                d_ptr = 0
                data.seek(d_ptr)

            try:
                # print('DEBUG: SERVER BLOCKING FOR DATA')
                pending = self.socket.can_read(0.8)
                if pending is None:
                    # No more data
                    continue

                if not pending:
                    # nothing pending; back to io_wait
                    continue

                while self.socket.can_read():
                    # print('DEBUG: SERVER BLOCKING FOR DATA....')
                    _data = self.socket.read()
                    if not _data:
                        # print('DEBUG: SERVER NO DATA (EXITING)....')
                        # Reset our settings to prepare for another connection
                        self.reset()
                        return
                    # print('DEBUG: SERVER READ DATA: %s' % _data.rstrip())

                    # Buffer response
                    data.write(_data)
                    d_len = data.tell()

            except (socket.error, SocketException):
                # Socket Issue
                # print('DEBUG: SOCKET ERROR (EXITING)....')
                # print('DEBUG: ERROR %s' % str(e))
                # Reset our sent_welcome flag
                self.sent_welcome = False
                return

            # Seek End for size
            if d_ptr == d_len:
                continue
            data.seek(d_ptr)

            # Acquire our line
            line = data.readline()

            # Build our response
            response = self.put(line)

            # Return it on the socket
            try:
                self.socket.send(response + NNTP_EOD)
            except:
                # connection lost
                # print('DEBUG: SOCKET ERROR DURING SEND (EXITING)....')
                return
Exemple #45
0
    def raster(
            self,
            inputs,
            bands=None,
            scales=None,
            data_type=None,
            output_format='GTiff',
            srs=None,
            dimensions=None,
            resolution=None,
            bounds=None,
            bounds_srs=None,
            cutline=None,
            place=None,
            align_pixels=False,
            resampler=None,
            dltile=None,
            save=False,
            outfile_basename=None,
            **pass_through_params
    ):
        """Given a list of :class:`Metadata <descarteslabs.services.Metadata>` identifiers,
        retrieve a translated and warped mosaic as an image file.

        :param inputs: List of :class:`Metadata` identifiers.
        :param bands: List of requested bands. If the last item in the list is an alpha
            band (with data range `[0, 1]`) it affects rastering of all other bands:
            When rastering multiple images, they are combined image-by-image only where
            each respective image's alpha band is `1` (pixels where the alpha band is not
            `1` are "transparent" in the overlap between images). If a pixel is fully
            masked considering all combined alpha bands it will be `0` in all non-alpha
            bands.
        :param scales: List of tuples specifying the scaling to be applied to each band.
            A tuple has 4 elements in the order ``(src_min, src_max, out_min, out_max)``,
            meaning values in the source range ``src_min`` to ``src_max`` will be scaled
            to the output range ``out_min`` to ``out_max``. A tuple with 2 elements
            ``(src_min, src_max)`` is also allowed, in which case the output range
            defaults to ``(0, 255)`` (a useful default for the common output type
            ``Byte``).  If no scaling is desired for a band, use ``None``.  This tuple
            format and behaviour is identical to GDAL's scales during translation.
            Example argument: ``[(0, 10000, 0, 127), None, (0, 10000)]`` - the first
            band will have source values 0-10000 scaled to 0-127, the second band will
            not be scaled, the third band will have 0-10000 scaled to 0-255.
        :param str output_format: Output format (one of ``GTiff``, ``PNG``, ``JPEG``).
        :param str data_type: Output data type (one of ``Byte``, ``UInt16``, ``Int16``,
            ``UInt32``, ``Int32``, ``Float32``, ``Float64``).
        :param str srs: Output spatial reference system definition understood by GDAL.
        :param float resolution: Desired resolution in output SRS units. Incompatible with
            `dimensions`
        :param tuple dimensions: Desired output (width, height) in pixels. Incompatible with
            `resolution`
        :param str cutline: A GeoJSON feature or geometry to be used as a cutline.
        :param str place: A slug identifier to be used as a cutline.
        :param tuple bounds: ``(min_x, min_y, max_x, max_y)`` in target SRS.
        :param str bounds_srs: Override the coordinate system in which bounds are expressed.
        :param bool align_pixels: Align pixels to the target coordinate system.
        :param str resampler: Resampling algorithm to be used during warping (``near``,
            ``bilinear``, ``cubic``, ``cubicsplice``, ``lanczos``, ``average``, ``mode``,
            ``max``, ``min``, ``med``, ``q1``, ``q3``).
        :param str dltile: a dltile key used to specify the resolution, bounds, and srs.
        :param bool save: Write resulting files to disk. Default: False
        :param str outfile_basename: If 'save' is True, override default filename using
            this string as a base.

        :return: A dictionary with two keys, ``files`` and ``metadata``. The value for
            ``files`` is a dictionary mapping file names to binary data for files (at the
            moment there will always be only a single file with the appropriate file
            extension based on the ``output_format`` requested). The value for ``metadata``
            is a dictionary containing details about the raster operation that happened.
            These details can be useful for debugging but shouldn't otherwise be relied on
            (there are no guarantees that certain keys will be present).
        """
        cutline = as_json_string(cutline)

        if place:
            places = Places()
            places.auth = self.auth
            shape = places.shape(place, geom='low')
            cutline = json.dumps(shape['geometry'])

        params = {
            'keys': inputs,
            'bands': bands,
            'scales': scales,
            'ot': data_type,
            'of': output_format,
            'srs': srs,
            'resolution': resolution,
            'shape': cutline,
            'outputBounds': bounds,
            'outputBoundsSRS': bounds_srs,
            'outsize': dimensions,
            'targetAlignedPixels': align_pixels,
            'resampleAlg': resampler,
        }
        params.update(pass_through_params)

        if dltile is not None:
            if isinstance(dltile, dict):
                params['dltile'] = dltile['properties']['key']
            else:
                params['dltile'] = dltile

        r = self.session.post('/raster', json=params)

        raw = BytesIO(r.content)

        json_resp = json.loads(raw.readline().decode('utf-8').strip())

        num_files = json_resp['files']
        json_resp['files'] = {}

        for _ in range(num_files):
            file_meta = json.loads(raw.readline().decode('utf-8').strip())

            fn = file_meta['name']
            data = raw.read(file_meta['length'])

            if outfile_basename:
                outfilename = "{}.{}".format(
                    outfile_basename,
                    ".".join(os.path.basename(fn).split(".")[1:])
                )
            else:
                outfilename = fn

            json_resp['files'][outfilename] = data

        if save:
            for filename, data in six.iteritems(json_resp['files']):
                with open(filename, "wb") as f:
                    f.write(data)

        return DotDict(json_resp)
Exemple #46
0
class HttpRequest(object):
    """A basic HTTP request."""

    # The encoding used in GET/POST dicts. None means use default setting.
    _encoding = None
    _upload_handlers = []

    def __init__(self):
        self.GET, self.POST, self.COOKIES, self.META, self.FILES = {}, {}, {}, {}, {}
        self.path = ''
        self.path_info = ''
        self.method = None
        self._post_parse_error = False

    def __repr__(self):
        return build_request_repr(self)

    def get_host(self):
        """Returns the HTTP host using the environment or request headers."""
        # We try three options, in order of decreasing preference.
        if settings.USE_X_FORWARDED_HOST and (
            'HTTP_X_FORWARDED_HOST' in self.META):
            host = self.META['HTTP_X_FORWARDED_HOST']
        elif 'HTTP_HOST' in self.META:
            host = self.META['HTTP_HOST']
        else:
            # Reconstruct the host using the algorithm from PEP 333.
            host = self.META['SERVER_NAME']
            server_port = str(self.META['SERVER_PORT'])
            if server_port != (self.is_secure() and '443' or '80'):
                host = '%s:%s' % (host, server_port)
        return host

    def get_full_path(self):
        # RFC 3986 requires query string arguments to be in the ASCII range.
        # Rather than crash if this doesn't happen, we encode defensively.
        return '%s%s' % (self.path, self.META.get('QUERY_STRING', '') and ('?' + iri_to_uri(self.META.get('QUERY_STRING', ''))) or '')

    def get_signed_cookie(self, key, default=RAISE_ERROR, salt='', max_age=None):
        """
        Attempts to return a signed cookie. If the signature fails or the
        cookie has expired, raises an exception... unless you provide the
        default argument in which case that value will be returned instead.
        """
        try:
            cookie_value = self.COOKIES[key].encode('utf-8')
        except KeyError:
            if default is not RAISE_ERROR:
                return default
            else:
                raise
        try:
            value = signing.get_cookie_signer(salt=key + salt).unsign(
                cookie_value, max_age=max_age)
        except signing.BadSignature:
            if default is not RAISE_ERROR:
                return default
            else:
                raise
        return value

    def build_absolute_uri(self, location=None):
        """
        Builds an absolute URI from the location and the variables available in
        this request. If no location is specified, the absolute URI is built on
        ``request.get_full_path()``.
        """
        if not location:
            location = self.get_full_path()
        if not absolute_http_url_re.match(location):
            current_uri = '%s://%s%s' % (self.is_secure() and 'https' or 'http',
                                         self.get_host(), self.path)
            location = urljoin(current_uri, location)
        return iri_to_uri(location)

    def _is_secure(self):
        return os.environ.get("HTTPS") == "on"

    def is_secure(self):
        # First, check the SECURE_PROXY_SSL_HEADER setting.
        if settings.SECURE_PROXY_SSL_HEADER:
            try:
                header, value = settings.SECURE_PROXY_SSL_HEADER
            except ValueError:
                raise ImproperlyConfigured('The SECURE_PROXY_SSL_HEADER setting must be a tuple containing two values.')
            if self.META.get(header, None) == value:
                return True

        # Failing that, fall back to _is_secure(), which is a hook for
        # subclasses to implement.
        return self._is_secure()

    def is_ajax(self):
        return self.META.get('HTTP_X_REQUESTED_WITH') == 'XMLHttpRequest'

    def _set_encoding(self, val):
        """
        Sets the encoding used for GET/POST accesses. If the GET or POST
        dictionary has already been created, it is removed and recreated on the
        next access (so that it is decoded correctly).
        """
        self._encoding = val
        if hasattr(self, '_get'):
            del self._get
        if hasattr(self, '_post'):
            del self._post

    def _get_encoding(self):
        return self._encoding

    encoding = property(_get_encoding, _set_encoding)

    def _initialize_handlers(self):
        self._upload_handlers = [uploadhandler.load_handler(handler, self)
                                 for handler in settings.FILE_UPLOAD_HANDLERS]

    def _set_upload_handlers(self, upload_handlers):
        if hasattr(self, '_files'):
            raise AttributeError("You cannot set the upload handlers after the upload has been processed.")
        self._upload_handlers = upload_handlers

    def _get_upload_handlers(self):
        if not self._upload_handlers:
            # If there are no upload handlers defined, initialize them from settings.
            self._initialize_handlers()
        return self._upload_handlers

    upload_handlers = property(_get_upload_handlers, _set_upload_handlers)

    def parse_file_upload(self, META, post_data):
        """Returns a tuple of (POST QueryDict, FILES MultiValueDict)."""
        self.upload_handlers = ImmutableList(
            self.upload_handlers,
            warning = "You cannot alter upload handlers after the upload has been processed."
        )
        parser = MultiPartParser(META, post_data, self.upload_handlers, self.encoding)
        return parser.parse()

    @property
    def body(self):
        if not hasattr(self, '_body'):
            if self._read_started:
                raise Exception("You cannot access body after reading from request's data stream")
            try:
                self._body = self.read()
            except IOError as e:
                raise UnreadablePostError, e, sys.exc_traceback
            self._stream = BytesIO(self._body)
        return self._body

    @property
    def raw_post_data(self):
        warnings.warn('HttpRequest.raw_post_data has been deprecated. Use HttpRequest.body instead.', DeprecationWarning)
        return self.body

    def _mark_post_parse_error(self):
        self._post = QueryDict('')
        self._files = MultiValueDict()
        self._post_parse_error = True

    def _load_post_and_files(self):
        # Populates self._post and self._files
        if self.method != 'POST':
            self._post, self._files = QueryDict('', encoding=self._encoding), MultiValueDict()
            return
        if self._read_started and not hasattr(self, '_body'):
            self._mark_post_parse_error()
            return

        if self.META.get('CONTENT_TYPE', '').startswith('multipart'):
            if hasattr(self, '_body'):
                # Use already read data
                data = BytesIO(self._body)
            else:
                data = self
            try:
                self._post, self._files = self.parse_file_upload(self.META, data)
            except:
                # An error occured while parsing POST data. Since when
                # formatting the error the request handler might access
                # self.POST, set self._post and self._file to prevent
                # attempts to parse POST data again.
                # Mark that an error occured. This allows self.__repr__ to
                # be explicit about it instead of simply representing an
                # empty POST
                self._mark_post_parse_error()
                raise
        else:
            self._post, self._files = QueryDict(self.body, encoding=self._encoding), MultiValueDict()

    ## File-like and iterator interface.
    ##
    ## Expects self._stream to be set to an appropriate source of bytes by
    ## a corresponding request subclass (e.g. WSGIRequest).
    ## Also when request data has already been read by request.POST or
    ## request.body, self._stream points to a BytesIO instance
    ## containing that data.

    def read(self, *args, **kwargs):
        self._read_started = True
        return self._stream.read(*args, **kwargs)

    def readline(self, *args, **kwargs):
        self._read_started = True
        return self._stream.readline(*args, **kwargs)

    def xreadlines(self):
        while True:
            buf = self.readline()
            if not buf:
                break
            yield buf
    __iter__ = xreadlines

    def readlines(self):
        return list(iter(self))
Exemple #47
0
class ReturnCodeToSubunit(object):
    """Converts a process return code to a subunit error on the process stdout.

    The ReturnCodeToSubunit object behaves as a readonly stream, supplying
    the read, readline and readlines methods. If the process exits non-zero a
    synthetic test is added to the output, making the error accessible to
    subunit stream consumers. If the process closes its stdout and then does
    not terminate, reading from the ReturnCodeToSubunit stream will hang.

    This class will be deleted at some point, allowing parsing to read from the
    actual fd and benefit from select for aggregating non-subunit output.
    """
    def __init__(self, process):
        """Adapt a process to a readable stream.

        :param process: A subprocess.Popen object that is
            generating subunit.
        """
        self.proc = process
        self.done = False
        self.source = self.proc.stdout
        self.lastoutput = LINEFEED

    def _append_return_code_as_test(self):
        if self.done is True:
            return
        self.source = BytesIO()
        returncode = self.proc.wait()
        if returncode != 0:
            if self.lastoutput != LINEFEED:
                # Subunit V1 is line orientated, it has to start on a fresh
                # line. V2 needs to start on any fresh utf8 character border
                # - which is not guaranteed in an arbitrary stream endpoint, so
                # injecting a \n gives us such a guarantee.
                self.source.write(_b('\n'))
            if v2_avail:
                stream = subunit.StreamResultToBytes(self.source)
                stream.status(test_id='process-returncode',
                              test_status='fail',
                              file_name='traceback',
                              mime_type='text/plain;charset=utf8',
                              file_bytes=('returncode %d' %
                                          returncode).encode('utf8'))
            else:
                self.source.write(
                    _b('test: process-returncode\n'
                       'failure: process-returncode [\n'
                       ' returncode %d\n'
                       ']\n' % returncode))
        self.source.seek(0)
        self.done = True

    def read(self, count=-1):
        if count == 0:
            return _b('')
        result = self.source.read(count)
        if result:
            self.lastoutput = result[-1]
            return result
        self._append_return_code_as_test()
        return self.source.read(count)

    def readline(self):
        result = self.source.readline()
        if result:
            self.lastoutput = result[-1]
            return result
        self._append_return_code_as_test()
        return self.source.readline()

    def readlines(self):
        result = self.source.readlines()
        if result:
            self.lastoutput = result[-1][-1]
        self._append_return_code_as_test()
        result.extend(self.source.readlines())
        return result
Exemple #48
0
class VCRHTTPResponse(HTTPResponse):
    """
    Stub response class that gets returned instead of a HTTPResponse
    """

    def __init__(self, recorded_response):
        self.fp = None
        self.recorded_response = recorded_response
        self.reason = recorded_response["status"]["message"]
        self.status = self.code = recorded_response["status"]["code"]
        self.version = None
        self._content = BytesIO(self.recorded_response["body"]["string"])
        self._closed = False

        headers = self.recorded_response["headers"]
        # Since we are loading a response that has already been serialized, our
        # response is no longer chunked.  That means we don't want any
        # libraries trying to process a chunked response.  By removing the
        # transfer-encoding: chunked header, this should cause the downstream
        # libraries to process this as a non-chunked response.
        te_key = [h for h in headers.keys() if h.upper() == "TRANSFER-ENCODING"]
        if te_key:
            del headers[te_key[0]]
        self.headers = self.msg = parse_headers(headers)

        self.length = compat.get_header(self.msg, "content-length") or None

    @property
    def closed(self):
        # in python3, I can't change the value of self.closed.  So I'
        # twiddling self._closed and using this property to shadow the real
        # self.closed from the superclas
        return self._closed

    def read(self, *args, **kwargs):
        return self._content.read(*args, **kwargs)

    def readall(self):
        return self._content.readall()

    def readinto(self, *args, **kwargs):
        return self._content.readinto(*args, **kwargs)

    def readline(self, *args, **kwargs):
        return self._content.readline(*args, **kwargs)

    def readlines(self, *args, **kwargs):
        return self._content.readlines(*args, **kwargs)

    def seekable(self):
        return self._content.seekable()

    def tell(self):
        return self._content.tell()

    def isatty(self):
        return self._content.isatty()

    def seek(self, *args, **kwargs):
        return self._content.seek(*args, **kwargs)

    def close(self):
        self._closed = True
        return True

    def getcode(self):
        return self.status

    def isclosed(self):
        return self.closed

    def info(self):
        return parse_headers(self.recorded_response["headers"])

    def getheaders(self):
        message = parse_headers(self.recorded_response["headers"])
        return list(compat.get_header_items(message))

    def getheader(self, header, default=None):
        values = [v for (k, v) in self.getheaders() if k.lower() == header.lower()]

        if values:
            return ", ".join(values)
        else:
            return default

    def readable(self):
        return self._content.readable()
fStringIO.write('Hello')
fStringIO.write(' ')
fStringIO.write('World!')
# getvalue()方法用于获得写入后的str。
print(fStringIO.getvalue())
# 要读取StringIO,可以用一个str初始化StringIO,然后,像读文件一样读取:
fStringIO1 = StringIO('Hello\nHi\nBybey')
while True:
    s = fStringIO1.readline()
    if s == '':
        break
    print(s.strip())

# BytesIO
print('BytesIO========================================================')
# StringIO操作的只能是str,如果要操作二进制数据,就需要使用BytesIO。
# BytesIO实现了在内存中读写bytes,我们创建一个BytesIO,然后写入一些bytes:
from io import BytesIO

fByteIO = BytesIO()
# 请注意,写入的不是str,而是经过UTF-8编码的bytes。
fByteIO.write('哈喽'.encode('utf-8'))
print(fByteIO.getvalue())
# 和StringIO类似,可以用一个bytes初始化BytesIO,然后,像读文件一样读取:
fByteIO1 = BytesIO(b'\xe5\x93\x88\xe5\x96\xbd')
print(fByteIO1.readline())
# 小结

# StringIO和BytesIO是在内存中操作str和bytes的方法,使得和读写文件具有一致的接口。

Exemple #50
0
    def parseResponse(self, rawheader, rawbody=None, type="curl"):
        self.__content = ""
        self._headers = []

        tp = TextParser()
        tp.setSource("string", rawheader)

        tp.readUntil(r"(HTTP/[0-9.]+) ([0-9]+)")
        while True:
            while True:
                try:
                    self.protocol = tp[0][0]
                except Exception:
                    self.protocol = "unknown"

                try:
                    self.code = tp[0][1]
                except Exception:
                    self.code = "0"

                if self.code != "100":
                    break
                else:
                    tp.readUntil(r"(HTTP/[0-9.]+) ([0-9]+)")

            self.code = int(self.code)

            while True:
                tp.readLine()
                if tp.search("^([^:]+): ?(.*)$"):
                    self.addHeader(tp[0][0], tp[0][1])
                else:
                    break

            # curl sometimes sends two headers when using follow, 302 and the final header
            # also when using proxies
            tp.readLine()
            if not tp.search(r"(HTTP/[0-9.]+) ([0-9]+)"):
                break
            else:
                self._headers = []

        # ignore CRLFs until request line
        while tp.lastline == "" and tp.readLine():
            pass

        # TODO: this should be added to rawbody not directly to __content
        if tp.lastFull_line:
            self.addContent(tp.lastFull_line)

        while tp.skip(1):
            self.addContent(tp.lastFull_line)

        if type == "curl":
            self.delHeader("Transfer-Encoding")

        if self.header_equal("Transfer-Encoding", "chunked"):
            result = ""
            content = BytesIO(rawbody)
            hexa = content.readline()
            nchunk = int(hexa.strip(), 16)

            while nchunk:
                result += content.read(nchunk)
                content.readline()
                hexa = content.readline()
                nchunk = int(hexa.strip(), 16)

            rawbody = result

        if self.header_equal("Content-Encoding", "gzip"):
            compressedstream = BytesIO(rawbody)
            gzipper = gzip.GzipFile(fileobj=compressedstream)
            rawbody = gzipper.read()
            self.delHeader("Content-Encoding")
        elif self.header_equal("Content-Encoding", "deflate"):
            deflated_data = None
            try:
                deflater = zlib.decompressobj()
                deflated_data = deflater.decompress(rawbody)
                deflated_data += deflater.flush()
            except zlib.error:
                try:
                    deflater = zlib.decompressobj(-zlib.MAX_WBITS)
                    deflated_data = deflater.decompress(rawbody)
                    deflated_data += deflater.flush()
                except zlib.error:
                    deflated_data = ""
            rawbody = deflated_data
            self.delHeader("Content-Encoding")

        if rawbody is not None:
            # Try to get charset encoding from headers
            content_encoding = get_encoding_from_headers(
                dict(self.getHeaders()))

            # fallback to default encoding
            if content_encoding is None:
                content_encoding = "utf-8"

            self.__content = python2_3_convert_from_unicode(
                rawbody.decode(content_encoding, errors="replace"))
Exemple #51
0
 def test_limit(self):
     s = BytesIO(b"foobar\nfoobar")
     s = tcp.Reader(s)
     assert s.readline(3) == b"foo"
Exemple #52
0
def test_dump():
    X_sparse, y_dense = load_svmlight_file(datafile)
    X_dense = X_sparse.toarray()
    y_sparse = sp.csr_matrix(y_dense)

    # slicing a csr_matrix can unsort its .indices, so test that we sort
    # those correctly
    X_sliced = X_sparse[np.arange(X_sparse.shape[0])]
    y_sliced = y_sparse[np.arange(y_sparse.shape[0])]

    for X in (X_sparse, X_dense, X_sliced):
        for y in (y_sparse, y_dense, y_sliced):
            for zero_based in (True, False):
                for dtype in [np.float32, np.float64, np.int32, np.int64]:
                    f = BytesIO()
                    # we need to pass a comment to get the version info in;
                    # LibSVM doesn't grok comments so they're not put in by
                    # default anymore.

                    if sp.issparse(y) and y.shape[0] == 1:
                        # make sure y's shape is: (n_samples, n_labels)
                        # when it is sparse
                        y = y.T

                    # Note: with dtype=np.int32 we are performing unsafe casts,
                    # where X.astype(dtype) overflows. The result is
                    # then platform dependent and X_dense.astype(dtype) may be
                    # different from X_sparse.astype(dtype).asarray().
                    X_input = X.astype(dtype)

                    dump_svmlight_file(X_input,
                                       y,
                                       f,
                                       comment="test",
                                       zero_based=zero_based)
                    f.seek(0)

                    comment = f.readline()
                    comment = str(comment, "utf-8")

                    assert "scikit-learn %s" % sklearn.__version__ in comment

                    comment = f.readline()
                    comment = str(comment, "utf-8")

                    assert ["one", "zero"][zero_based] + "-based" in comment

                    X2, y2 = load_svmlight_file(f,
                                                dtype=dtype,
                                                zero_based=zero_based)
                    assert X2.dtype == dtype
                    assert_array_equal(X2.sorted_indices().indices, X2.indices)

                    X2_dense = X2.toarray()
                    if sp.issparse(X_input):
                        X_input_dense = X_input.toarray()
                    else:
                        X_input_dense = X_input

                    if dtype == np.float32:
                        # allow a rounding error at the last decimal place
                        assert_array_almost_equal(X_input_dense, X2_dense, 4)
                        assert_array_almost_equal(
                            y_dense.astype(dtype, copy=False), y2, 4)
                    else:
                        # allow a rounding error at the last decimal place
                        assert_array_almost_equal(X_input_dense, X2_dense, 15)
                        assert_array_almost_equal(
                            y_dense.astype(dtype, copy=False), y2, 15)
Exemple #53
0
class InputFile(object):
    max_buffer_size = 1024*1024

    def __init__(self, rfile, length):
        """File-like object used to provide a seekable view of request body data"""
        self._file = rfile
        self.length = length

        self._file_position = 0

        if length > self.max_buffer_size:
            self._buf = tempfile.TemporaryFile()
        else:
            self._buf = BytesIO()

    @property
    def _buf_position(self):
        rv = self._buf.tell()
        assert rv <= self._file_position
        return rv

    def read(self, bytes=-1):
        assert self._buf_position <= self._file_position

        if bytes < 0:
            bytes = self.length - self._buf_position
        bytes_remaining = min(bytes, self.length - self._buf_position)

        if bytes_remaining == 0:
            return b""

        if self._buf_position != self._file_position:
            buf_bytes = min(bytes_remaining, self._file_position - self._buf_position)
            old_data = self._buf.read(buf_bytes)
            bytes_remaining -= buf_bytes
        else:
            old_data = b""

        assert bytes_remaining == 0 or self._buf_position == self._file_position, (
            "Before reading buffer position (%i) didn't match file position (%i)" %
            (self._buf_position, self._file_position))
        new_data = self._file.read(bytes_remaining)
        self._buf.write(new_data)
        self._file_position += bytes_remaining
        assert bytes_remaining == 0 or self._buf_position == self._file_position, (
            "After reading buffer position (%i) didn't match file position (%i)" %
            (self._buf_position, self._file_position))

        return old_data + new_data

    def tell(self):
        return self._buf_position

    def seek(self, offset):
        if offset > self.length or offset < 0:
            raise ValueError
        if offset <= self._file_position:
            self._buf.seek(offset)
        else:
            self.read(offset - self._file_position)

    def readline(self, max_bytes=None):
        if max_bytes is None:
            max_bytes = self.length - self._buf_position

        if self._buf_position < self._file_position:
            data = self._buf.readline(max_bytes)
            if data.endswith(b"\n") or len(data) == max_bytes:
                return data
        else:
            data = b""

        assert self._buf_position == self._file_position

        initial_position = self._file_position
        found = False
        buf = []
        max_bytes -= len(data)
        while not found:
            readahead = self.read(min(2, max_bytes))
            max_bytes -= len(readahead)
            for i, c in enumerate(readahead):
                if c == b"\n"[0]:
                    buf.append(readahead[:i+1])
                    found = True
                    break
            if not found:
                buf.append(readahead)
            if not readahead or not max_bytes:
                break
        new_data = b"".join(buf)
        data += new_data
        self.seek(initial_position + len(new_data))
        return data

    def readlines(self):
        rv = []
        while True:
            data = self.readline()
            if data:
                rv.append(data)
            else:
                break
        return rv

    def __next__(self):
        data = self.readline()
        if data:
            return data
        else:
            raise StopIteration

    next = __next__

    def __iter__(self):
        return self
Exemple #54
0
class ParseRequest(BaseHTTPRequestHandler):
    def __init__(self, request_text):
        if isinstance(request_text, str):
            request_text = request_text.encode()
        assert isinstance(request_text, bytes) is True
        self.rfile = BytesIO(request_text)
        self.raw_requestline = self.rfile.readline()
        self.error_code = self.error_message = None
        self.parse_request()
        if b'\r\n' in request_text:
            self.req_body = request_text.split(b'\r\n')[-1]
        elif b'\n\r' in request_text:
            self.req_body = request_text.split(b'\n\r')[-1]
        else:
            self.req_body = b''

    def send_error(self, code, message):
        self.error_code = code
        self.error_message = message

    def _replace_b(self, old, new):
        if isinstance(old, bytes):
            pre = old[:old.find(b'{{')]
            end = old[old.find(b'}}') + 2:]
        else:
            pre = old[:old.find('{{')]
            end = old[old.find('}}') + 2:]
        return pre + new + end

    def _replace_c(self, old, new):
        if isinstance(old, bytes):
            pre = old[:old.find(b'[[')]
            end = old[old.find(b']]') + 2:]
        else:
            pre = old[:old.find('[[')]
            end = old[old.find(']]') + 2:]
        return pre + new + end

    def _get_c(self, old):
        for i in re.findall(r'\[\[(.*)\]\]', old):
            if os.path.exists(i.strip()):
                w = []
                with open(i.strip()) as fp:
                    for l in fp:
                        w.append(l.strip())
                yield str(w).strip()

    def _eval_option(self, one):
        batch_words = []
        if os.path.exists(one.strip()):
            gprint("load file form : %s" % one)
            with open(one.strip()) as fp:
                for l in fp:
                    o = l.strip()
                    batch_words.append(o)
        else:
            try:
                if '[[' in one and ']]' in one:
                    # gprint("detect file in code")
                    tone = one
                    for d in self._get_c(tone):
                        # gprint("patch %s" % d)
                        one = self._replace_c(one, d)

                gprint("try parse from python code:\n %s" %
                       colored(one, 'blue'))
                w = eval(one)
                if isinstance(w, list):
                    batch_words = w
            except Exception as e:
                rprint(str(e))
                gprint("only as words")
                batch_words = one.split()
        return batch_words

    def _gen_map(self, data, now_da=[]):
        if isinstance(data, list) and len(data) > 1:
            f = data[0]
            for i in f:
                a = now_da + [i]
                yield from self._gen_map(data[1:], a)
        else:
            for i in data[0]:
                b = now_da + [i]
                yield b

    def eval_and_replace(self):
        old = self.req_body.decode('utf8', 'ignore')
        gprint(old)
        options = re.findall(r'\{\{(.+?)\}\}', old)
        eval_res = []
        for op in options:
            pp = self._eval_option(op)
            eval_res.append(pp)
        if eval_res:
            for w in self._gen_map(eval_res):
                body_old = old
                for i in w:
                    body_old = self._replace_b(body_old, i)
                yield body_old, w
def test_dump():
    X_sparse, y_dense = load_svmlight_file(datafile)
    X_dense = X_sparse.toarray()
    y_sparse = sp.csr_matrix(y_dense)

    # slicing a csr_matrix can unsort its .indices, so test that we sort
    # those correctly
    X_sliced = X_sparse[np.arange(X_sparse.shape[0])]
    y_sliced = y_sparse[np.arange(y_sparse.shape[0])]

    for X in (X_sparse, X_dense, X_sliced):
        for y in (y_sparse, y_dense, y_sliced):
            for zero_based in (True, False):
                for dtype in [np.float32, np.float64, np.int32]:
                    f = BytesIO()
                    # we need to pass a comment to get the version info in;
                    # LibSVM doesn't grok comments so they're not put in by
                    # default anymore.

                    if (sp.issparse(y) and y.shape[0] == 1):
                        # make sure y's shape is: (n_samples, n_labels)
                        # when it is sparse
                        y = y.T

                    dump_svmlight_file(X.astype(dtype),
                                       y,
                                       f,
                                       comment="test",
                                       zero_based=zero_based)
                    f.seek(0)

                    comment = f.readline()
                    try:
                        comment = str(comment, "utf-8")
                    except TypeError:  # fails in Python 2.x
                        pass

                    assert_in("scikit-learn %s" % sklearn.__version__, comment)

                    comment = f.readline()
                    try:
                        comment = str(comment, "utf-8")
                    except TypeError:  # fails in Python 2.x
                        pass

                    assert_in(["one", "zero"][zero_based] + "-based", comment)

                    X2, y2 = load_svmlight_file(f,
                                                dtype=dtype,
                                                zero_based=zero_based)
                    assert_equal(X2.dtype, dtype)
                    assert_array_equal(X2.sorted_indices().indices, X2.indices)

                    X2_dense = X2.toarray()

                    if dtype == np.float32:
                        # allow a rounding error at the last decimal place
                        assert_array_almost_equal(X_dense.astype(dtype),
                                                  X2_dense, 4)
                        assert_array_almost_equal(y_dense.astype(dtype), y2, 4)
                    else:
                        # allow a rounding error at the last decimal place
                        assert_array_almost_equal(X_dense.astype(dtype),
                                                  X2_dense, 15)
                        assert_array_almost_equal(y_dense.astype(dtype), y2,
                                                  15)
Exemple #56
0
        def parseResponse(self, rawResponse, type="curl"):
                self.__content = ""
                self._headers = []

                tp = TextParser()
                tp.setSource("string", rawResponse)

                tp.readUntil("(HTTP\S*) ([0-9]+)")
                while True:
                    while True:
                            try:
                                    self.protocol = tp[0][0]
                            except Exception:
                                    self.protocol = "unknown"

                            try:
                                    self.code = tp[0][1]
                            except Exception:
                                    self.code = "0"

                            if self.code != "100":
                                    break
                            else:
                                tp.readUntil("(HTTP\S*) ([0-9]+)")

                    self.code = int(self.code)

                    while True:
                            tp.readLine()
                            if (tp.search("^([^:]+): ?(.*)$")):
                                    self.addHeader(tp[0][0], tp[0][1])
                            else:
                                    break

                    # curl sometimes sends two headers when using follow, 302 and the final header
                    tp.readLine()
                    if not tp.search("(HTTP\S*) ([0-9]+)"):
                        break
                    else:
                        self._headers = []

                while tp.skip(1):
                        self.addContent(tp.lastFull_line)

                if type == 'curl':
                        self.delHeader("Transfer-Encoding")

                if self.header_equal("Transfer-Encoding", "chunked"):
                        result = ""
                        content = BytesIO(self.__content)
                        hexa = content.readline()
                        nchunk = int(hexa.strip(), 16)

                        while nchunk:
                                result += content.read(nchunk)
                                content.readline()
                                hexa = content.readline()
                                nchunk = int(hexa.strip(), 16)

                        self.__content = result

                if self.header_equal("Content-Encoding", "gzip"):
                        compressedstream = BytesIO(self.__content)
                        gzipper = gzip.GzipFile(compressedstream)
                        body = gzipper.read()
                        self.__content = body
                        self.delHeader("Content-Encoding")
Exemple #57
0
class BufferedReader(object):
    """
    A wrapping line reader which wraps an existing reader.
    Read operations operate on underlying buffer, which is filled to
    block_size (16384 default)

    If an optional decompress type is specified,
    data is fed through the decompressor when read from the buffer.
    Currently supported decompression: gzip
    If unspecified, default decompression is None

    If decompression is specified, and decompress fails on first try,
    data is assumed to not be compressed and no exception is thrown.

    If a failure occurs after data has been
    partially decompressed, the exception is propagated.

    """

    DECOMPRESSORS = {
        'gzip': gzip_decompressor,
        'deflate': deflate_decompressor,
        'deflate_alt': deflate_decompressor_alt
    }

    def __init__(self,
                 stream,
                 block_size=BUFF_SIZE,
                 decomp_type=None,
                 starting_data=None):
        self.stream = stream
        self.block_size = block_size

        self._init_decomp(decomp_type)

        self.buff = None
        self.starting_data = starting_data
        self.num_read = 0
        self.buff_size = 0

    def set_decomp(self, decomp_type):
        self._init_decomp(decomp_type)

    def _init_decomp(self, decomp_type):
        self.num_block_read = 0
        if decomp_type:
            try:
                self.decomp_type = decomp_type
                self.decompressor = self.DECOMPRESSORS[decomp_type.lower()]()
            except KeyError:
                raise Exception('Decompression type not supported: ' +
                                decomp_type)
        else:
            self.decomp_type = None
            self.decompressor = None

    def _fillbuff(self, block_size=None):
        if not self.empty():
            return

        # can't read past next member
        if self.rem_length() > 0:
            return

        block_size = block_size or self.block_size

        if self.starting_data:
            data = self.starting_data
            self.starting_data = None
        else:
            data = self.stream.read(block_size)

        self._process_read(data)

        # if raw data is not empty and decompressor set, but
        # decompressed buff is empty, keep reading --
        # decompressor likely needs more data to decompress
        while data and self.decompressor and not self.decompressor.unused_data and self.empty(
        ):
            data = self.stream.read(block_size)
            self._process_read(data)

    def _process_read(self, data):
        # don't process if no raw data read
        if not data:
            self.buff = None
            return

        data = self._decompress(data)
        self.buff_size = len(data)
        self.num_read += self.buff_size
        self.num_block_read += self.buff_size
        self.buff = BytesIO(data)

    def _decompress(self, data):
        if self.decompressor and data:
            try:
                data = self.decompressor.decompress(data)
            except Exception as e:
                # if first read attempt, assume non-gzipped stream
                if self.num_block_read == 0:
                    if self.decomp_type == 'deflate':
                        self._init_decomp('deflate_alt')
                        data = self._decompress(data)
                    else:
                        self.decompressor = None
                # otherwise (partly decompressed), something is wrong
                else:
                    print(str(e))
                    return b''
        return data

    def read(self, length=None):
        """
        Fill bytes and read some number of bytes
        (up to length if specified)
        <= length bytes may be read if reached the end of input
        if at buffer boundary, will attempt to read again until
        specified length is read
        """
        all_buffs = []
        while length is None or length > 0:
            self._fillbuff()
            if self.empty():
                break

            buff = self.buff.read(length)
            all_buffs.append(buff)
            if length:
                length -= len(buff)

        return b''.join(all_buffs)

    def readline(self, length=None):
        """
        Fill buffer and read a full line from the buffer
        (up to specified length, if provided)
        If no newline found at end, try filling buffer again in case
        at buffer boundary.
        """
        if length == 0:
            return b''

        self._fillbuff()

        if self.empty():
            return b''

        linebuff = self.buff.readline(length)

        # we may be at a boundary
        while not linebuff.endswith(b'\n'):
            if length:
                length -= len(linebuff)
                if length <= 0:
                    break

            self._fillbuff()

            if self.empty():
                break

            linebuff += self.buff.readline(length)

        return linebuff

    def empty(self):
        return not self.buff or self.buff.tell() >= self.buff_size

    def read_next_member(self):
        if not self.decompressor or not self.decompressor.unused_data:
            return False

        self.starting_data = self.decompressor.unused_data
        self._init_decomp(self.decomp_type)
        return True

    def rem_length(self):
        rem = 0
        if self.buff:
            rem = self.buff_size - self.buff.tell()

        if self.decompressor and self.decompressor.unused_data:
            rem += len(self.decompressor.unused_data)
        return rem

    def close(self):
        if self.stream:
            self.stream.close()
            self.stream = None

    @classmethod
    def get_supported_decompressors(cls):
        return cls.DECOMPRESSORS.keys()
Exemple #58
0
from io import BytesIO, StringIO

bio = BytesIO()
print(bio.readable(), bio.writable(), bio.seekable())
bio.write(b'magede\nPython')
bio.seek(0)
print(bio.readline())
print(bio.getvalue())
bio.close()

sio = StringIO()
print(sio.readable(), sio.writable(), sio.seekable())
sio.write('magedu\nPython')
sio.seek(0)
print(sio.readline())
print(sio.getvalue())
sio.close()

# 二者都是io模块中的类:在内存中,开辟一个文本或者二进制模式的buffer,可以像文件对象一样操作它,
# 当close方法被调用的时候,这个buffer会被释放
# getvalue()获取全部内容,跟文件指针没有关系
# StringIO的好处:一般来说,磁盘的操作比内存的操作要慢的多,内存足够的情况下,
# 一般的优化思路是少落地,减少磁盘IO的过程,可以大大提高程序的运行效率

# 类文件对象:file-like对象,可以像文件对象一样操作
from sys import stdout

f = stdout
print(type(f))
f.write('magedu.com')  # 控制台输出
Exemple #59
0
class HttpRequest:
    """A basic HTTP request."""

    # The encoding used in GET/POST dicts. None means use default setting.
    _encoding = None
    _upload_handlers = []

    def __init__(self):
        # WARNING: The `WSGIRequest` subclass doesn't call `super`.
        # Any variable assignment made here should also happen in
        # `WSGIRequest.__init__()`.

        self.GET = QueryDict(mutable=True)
        self.POST = QueryDict(mutable=True)
        self.COOKIES = {}
        self.META = {}
        self.FILES = MultiValueDict()

        self.path = ''
        self.path_info = ''
        self.method = None
        self.resolver_match = None
        self.content_type = None
        self.content_params = None

    def __repr__(self):
        if self.method is None or not self.get_full_path():
            return '<%s>' % self.__class__.__name__
        return '<%s: %s %r>' % (self.__class__.__name__, self.method,
                                self.get_full_path())

    @cached_property
    def headers(self):
        return HttpHeaders(self.META)

    def _get_raw_host(self):
        """
        Return the HTTP host using the environment or request headers. Skip
        allowed hosts protection, so may return an insecure host.
        """
        # We try three options, in order of decreasing preference.
        if settings.USE_X_FORWARDED_HOST and ('HTTP_X_FORWARDED_HOST'
                                              in self.META):
            host = self.META['HTTP_X_FORWARDED_HOST']
        elif 'HTTP_HOST' in self.META:
            host = self.META['HTTP_HOST']
        else:
            # Reconstruct the host using the algorithm from PEP 333.
            host = self.META['SERVER_NAME']
            server_port = self.get_port()
            if server_port != ('443' if self.is_secure() else '80'):
                host = '%s:%s' % (host, server_port)
        return host

    def get_host(self):
        """Return the HTTP host using the environment or request headers."""
        host = self._get_raw_host()

        # Allow variants of localhost if ALLOWED_HOSTS is empty and DEBUG=True.
        allowed_hosts = settings.ALLOWED_HOSTS
        if settings.DEBUG and not allowed_hosts:
            allowed_hosts = ['localhost', '127.0.0.1', '[::1]']

        domain, port = split_domain_port(host)
        if domain and validate_host(domain, allowed_hosts):
            return host
        else:
            msg = "Invalid HTTP_HOST header: %r." % host
            if domain:
                msg += " You may need to add %r to ALLOWED_HOSTS." % domain
            else:
                msg += " The domain name provided is not valid according to RFC 1034/1035."
            raise DisallowedHost(msg)

    def get_port(self):
        """Return the port number for the request as a string."""
        if settings.USE_X_FORWARDED_PORT and 'HTTP_X_FORWARDED_PORT' in self.META:
            port = self.META['HTTP_X_FORWARDED_PORT']
        else:
            port = self.META['SERVER_PORT']
        return str(port)

    def get_full_path(self, force_append_slash=False):
        return self._get_full_path(self.path, force_append_slash)

    def get_full_path_info(self, force_append_slash=False):
        return self._get_full_path(self.path_info, force_append_slash)

    def _get_full_path(self, path, force_append_slash):
        # RFC 3986 requires query string arguments to be in the ASCII range.
        # Rather than crash if this doesn't happen, we encode defensively.
        return '%s%s%s' % (escape_uri_path(path), '/' if force_append_slash
                           and not path.endswith('/') else '',
                           ('?' + iri_to_uri(self.META.get('QUERY_STRING', ''))
                            ) if self.META.get('QUERY_STRING', '') else '')

    def get_signed_cookie(self,
                          key,
                          default=RAISE_ERROR,
                          salt='',
                          max_age=None):
        """
        Attempt to return a signed cookie. If the signature fails or the
        cookie has expired, raise an exception, unless the `default` argument
        is provided,  in which case return that value.
        """
        try:
            cookie_value = self.COOKIES[key]
        except KeyError:
            if default is not RAISE_ERROR:
                return default
            else:
                raise
        try:
            value = signing.get_cookie_signer(salt=key + salt).unsign(
                cookie_value, max_age=max_age)
        except signing.BadSignature:
            if default is not RAISE_ERROR:
                return default
            else:
                raise
        return value

    def get_raw_uri(self):
        """
        Return an absolute URI from variables available in this request. Skip
        allowed hosts protection, so may return insecure URI.
        """
        return '{scheme}://{host}{path}'.format(
            scheme=self.scheme,
            host=self._get_raw_host(),
            path=self.get_full_path(),
        )

    def build_absolute_uri(self, location=None):
        """
        Build an absolute URI from the location and the variables available in
        this request. If no ``location`` is specified, build the absolute URI
        using request.get_full_path(). If the location is absolute, convert it
        to an RFC 3987 compliant URI and return it. If location is relative or
        is scheme-relative (i.e., ``//example.com/``), urljoin() it to a base
        URL constructed from the request variables.
        """
        if location is None:
            # Make it an absolute url (but schemeless and domainless) for the
            # edge case that the path starts with '//'.
            location = '//%s' % self.get_full_path()
        bits = urlsplit(location)
        if not (bits.scheme and bits.netloc):
            # Handle the simple, most common case. If the location is absolute
            # and a scheme or host (netloc) isn't provided, skip an expensive
            # urljoin() as long as no path segments are '.' or '..'.
            if (bits.path.startswith('/') and not bits.scheme
                    and not bits.netloc and '/./' not in bits.path
                    and '/../' not in bits.path):
                # If location starts with '//' but has no netloc, reuse the
                # schema and netloc from the current request. Strip the double
                # slashes and continue as if it wasn't specified.
                if location.startswith('//'):
                    location = location[2:]
                location = self._current_scheme_host + location
            else:
                # Join the constructed URL with the provided location, which
                # allows the provided location to apply query strings to the
                # base path.
                location = urljoin(self._current_scheme_host + self.path,
                                   location)
        return iri_to_uri(location)

    @cached_property
    def _current_scheme_host(self):
        return '{}://{}'.format(self.scheme, self.get_host())

    def _get_scheme(self):
        """
        Hook for subclasses like WSGIRequest to implement. Return 'http' by
        default.
        """
        return 'http'

    @property
    def scheme(self):
        if settings.SECURE_PROXY_SSL_HEADER:
            try:
                header, secure_value = settings.SECURE_PROXY_SSL_HEADER
            except ValueError:
                raise ImproperlyConfigured(
                    'The SECURE_PROXY_SSL_HEADER setting must be a tuple containing two values.'
                )
            header_value = self.META.get(header)
            if header_value is not None:
                return 'https' if header_value == secure_value else 'http'
        return self._get_scheme()

    def is_secure(self):
        return self.scheme == 'https'

    def is_ajax(self):
        return self.META.get('HTTP_X_REQUESTED_WITH') == 'XMLHttpRequest'

    @property
    def encoding(self):
        return self._encoding

    @encoding.setter
    def encoding(self, val):
        """
        Set the encoding used for GET/POST accesses. If the GET or POST
        dictionary has already been created, remove and recreate it on the
        next access (so that it is decoded correctly).
        """
        self._encoding = val
        if hasattr(self, 'GET'):
            del self.GET
        if hasattr(self, '_post'):
            del self._post

    def _initialize_handlers(self):
        self._upload_handlers = [
            uploadhandler.load_handler(handler, self)
            for handler in settings.FILE_UPLOAD_HANDLERS
        ]

    @property
    def upload_handlers(self):
        if not self._upload_handlers:
            # If there are no upload handlers defined, initialize them from settings.
            self._initialize_handlers()
        return self._upload_handlers

    @upload_handlers.setter
    def upload_handlers(self, upload_handlers):
        if hasattr(self, '_files'):
            raise AttributeError(
                "You cannot set the upload handlers after the upload has been processed."
            )
        self._upload_handlers = upload_handlers

    def parse_file_upload(self, META, post_data):
        """Return a tuple of (POST QueryDict, FILES MultiValueDict)."""
        self.upload_handlers = ImmutableList(
            self.upload_handlers,
            warning=
            "You cannot alter upload handlers after the upload has been processed."
        )
        parser = MultiPartParser(META, post_data, self.upload_handlers,
                                 self.encoding)
        return parser.parse()

    @property
    def body(self):
        if not hasattr(self, '_body'):
            if self._read_started:
                raise RawPostDataException(
                    "You cannot access body after reading from request's data stream"
                )

            # Limit the maximum request data size that will be handled in-memory.
            if (settings.DATA_UPLOAD_MAX_MEMORY_SIZE is not None
                    and int(self.META.get('CONTENT_LENGTH')
                            or 0) > settings.DATA_UPLOAD_MAX_MEMORY_SIZE):
                raise RequestDataTooBig(
                    'Request body exceeded settings.DATA_UPLOAD_MAX_MEMORY_SIZE.'
                )

            try:
                self._body = self.read()
            except IOError as e:
                raise UnreadablePostError(*e.args) from e
            self._stream = BytesIO(self._body)
        return self._body

    def _mark_post_parse_error(self):
        self._post = QueryDict()
        self._files = MultiValueDict()

    def _load_post_and_files(self):
        """Populate self._post and self._files if the content-type is a form type"""
        if self.method != 'POST':
            self._post, self._files = QueryDict(
                encoding=self._encoding), MultiValueDict()
            return
        if self._read_started and not hasattr(self, '_body'):
            self._mark_post_parse_error()
            return

        if self.content_type == 'multipart/form-data':
            if hasattr(self, '_body'):
                # Use already read data
                data = BytesIO(self._body)
            else:
                data = self
            try:
                self._post, self._files = self.parse_file_upload(
                    self.META, data)
            except MultiPartParserError:
                # An error occurred while parsing POST data. Since when
                # formatting the error the request handler might access
                # self.POST, set self._post and self._file to prevent
                # attempts to parse POST data again.
                self._mark_post_parse_error()
                raise
        elif self.content_type == 'application/x-www-form-urlencoded':
            self._post, self._files = QueryDict(
                self.body, encoding=self._encoding), MultiValueDict()
        else:
            self._post, self._files = QueryDict(
                encoding=self._encoding), MultiValueDict()

    def close(self):
        if hasattr(self, '_files'):
            for f in chain.from_iterable(l[1] for l in self._files.lists()):
                f.close()

    # File-like and iterator interface.
    #
    # Expects self._stream to be set to an appropriate source of bytes by
    # a corresponding request subclass (e.g. WSGIRequest).
    # Also when request data has already been read by request.POST or
    # request.body, self._stream points to a BytesIO instance
    # containing that data.

    def read(self, *args, **kwargs):
        self._read_started = True
        try:
            return self._stream.read(*args, **kwargs)
        except IOError as e:
            raise UnreadablePostError(*e.args) from e

    def readline(self, *args, **kwargs):
        self._read_started = True
        try:
            return self._stream.readline(*args, **kwargs)
        except IOError as e:
            raise UnreadablePostError(*e.args) from e

    def __iter__(self):
        return iter(self.readline, b'')

    def xreadlines(self):
        warnings.warn(
            'HttpRequest.xreadlines() is deprecated in favor of iterating the '
            'request.',
            RemovedInDjango30Warning,
            stacklevel=2,
        )
        yield from self

    def readlines(self):
        return list(self)
Exemple #60
0
#! /usr/bin/env python3
# -*- coding: utf-8 -*-
"""a test script of BytesIO"""
__author__ = 'Jason Yang'

from io import BytesIO

#   write to BytesIO:
f = BytesIO()
f.write(b'hello')
f.write(b' ')
f.write(b'world!')
#   getvalue(): b'hello world!'
print('getvalue():', f.getvalue())

#   read from BytesIO
f = BytesIO('临江仙 宋·苏轼\n'
            '夜饮东坡醒复醉,\n归来仿佛三更。\n家童鼻息已雷鸣。\n敲门都不应,\n倚杖听江声。\n\n'
            '长恨此身非我有,\n何时忘却营营。\n夜阑风静縠纹平。\n小舟从此逝,\n江海寄余生。'.encode('utf-8'))
while True:
    s = f.readline()
    if s == b'':
        break
    print(s.strip().decode('utf-8'))