class packers_write_excel_xlwt(object): goal_time = 0.2 def setup(self): self.f = '__test__.msg' def remove(f): try: os.remove(self.f) except: pass self.N = 100000 self.C = 5 self.index = date_range('20000101', periods=self.N, freq='H') self.df = DataFrame(dict([('float{0}'.format(i), randn(self.N)) for i in range(self.C)]), index=self.index) self.N = 100000 self.C = 5 self.index = date_range('20000101', periods=self.N, freq='H') self.df2 = DataFrame(dict([('float{0}'.format(i), randn(self.N)) for i in range(self.C)]), index=self.index) self.df2['object'] = [('%08x' % randrange((16**8))) for _ in range(self.N)] remove(self.f) self.bio = BytesIO() def time_packers_write_excel_xlwt(self): self.bio.seek(0) self.writer = pd.io.excel.ExcelWriter(self.bio, engine='xlwt') self.df[:2000].to_excel(self.writer) self.writer.save()
class Excel(object): goal_time = 0.2 params = ['openpyxl', 'xlsxwriter', 'xlwt'] param_names = ['engine'] def setup(self, engine): N = 2000 C = 5 self.df = DataFrame(np.random.randn(N, C), columns=['float{}'.format(i) for i in range(C)], index=date_range('20000101', periods=N, freq='H')) self.df['object'] = tm.makeStringIndex(N) self.bio_read = BytesIO() self.writer_read = ExcelWriter(self.bio_read, engine=engine) self.df.to_excel(self.writer_read, sheet_name='Sheet1') self.writer_read.save() self.bio_read.seek(0) self.bio_write = BytesIO() self.bio_write.seek(0) self.writer_write = ExcelWriter(self.bio_write, engine=engine) def time_read_excel(self, engine): read_excel(self.bio_read) def time_write_excel(self, engine): self.df.to_excel(self.writer_write, sheet_name='Sheet1') self.writer_write.save()
def _pickle_array(arr): arr = arr.view(np.ndarray) buf = BytesIO() write_array(buf, arr) return buf.getvalue()
class packers_write_excel_xlwt(object): goal_time = 0.2 def setup(self): self.f = '__test__.msg' self.N = 100000 self.C = 5 self.index = date_range('20000101', periods=self.N, freq='H') self.df = DataFrame(dict([('float{0}'.format(i), randn(self.N)) for i in range(self.C)]), index=self.index) self.N = 100000 self.C = 5 self.index = date_range('20000101', periods=self.N, freq='H') self.df2 = DataFrame(dict([('float{0}'.format(i), randn(self.N)) for i in range(self.C)]), index=self.index) self.df2['object'] = [('%08x' % randrange((16 ** 8))) for _ in range(self.N)] self.remove(self.f) self.bio = BytesIO() def time_packers_write_excel_xlwt(self): self.bio.seek(0) self.writer = pd.io.excel.ExcelWriter(self.bio, engine='xlwt') self.df[:2000].to_excel(self.writer) self.writer.save() def remove(self, f): try: os.remove(self.f) except: pass
def setup(self): self.f = '__test__.msg' def remove(f): try: os.remove(self.f) except: pass self.N = 100000 self.C = 5 self.index = date_range('20000101', periods=self.N, freq='H') self.df = DataFrame(dict([('float{0}'.format(i), randn(self.N)) for i in range(self.C)]), index=self.index) self.N = 100000 self.C = 5 self.index = date_range('20000101', periods=self.N, freq='H') self.df2 = DataFrame(dict([('float{0}'.format(i), randn(self.N)) for i in range(self.C)]), index=self.index) self.df2['object'] = [('%08x' % randrange((16**8))) for _ in range(self.N)] remove(self.f) self.bio = BytesIO()
def write_graph(self, graph_object, graph_name='Graph', image_width=5.25): memfile = BytesIO() graph_object.get_figure().savefig(memfile) self.document.add_paragraph(graph_name, style='List Bullet') self.document.add_picture(memfile, width=Inches(image_width)) self.document.save(self.docname) memfile.close()
class packers_read_excel(_Packers): def setup(self): self._setup() self.bio = BytesIO() self.writer = pd.io.excel.ExcelWriter(self.bio, engine='xlsxwriter') self.df[:2000].to_excel(self.writer) self.writer.save() def time_packers_read_excel(self): self.bio.seek(0) pd.read_excel(self.bio)
def setup(self, engine): N = 2000 C = 5 self.df = DataFrame(np.random.randn(N, C), columns=['float{}'.format(i) for i in range(C)], index=date_range('20000101', periods=N, freq='H')) self.df['object'] = tm.makeStringIndex(N) self.bio_read = BytesIO() self.writer_read = ExcelWriter(self.bio_read, engine=engine) self.df.to_excel(self.writer_read, sheet_name='Sheet1') self.writer_read.save() self.bio_read.seek(0)
def test_stringio_writer(self): _skip_if_no_xlsxwriter() _skip_if_no_xlrd() path = BytesIO() with ExcelWriter(path, engine='xlsxwriter', **{'options': {'in-memory': True}}) as ew: self.frame.to_excel(ew, 'test1', engine='xlsxwriter') ew.save() path.seek(0) ef = ExcelFile(path) found_df = ef.parse('test1') tm.assert_frame_equal(self.frame, found_df) path.close()
def maybe_read_encoded_stream(reader, encoding=None, compression=None): """read an encoded stream from the reader and transform the bytes to unicode if required based on the encoding Parameters ---------- reader : a streamable file-like object encoding : optional, the encoding to attempt to read Returns ------- a tuple of (a stream of decoded bytes, the encoding which was used) """ if compat.PY3 or encoding is not None: # pragma: no cover if encoding: errors = 'strict' else: errors = 'replace' encoding = 'utf-8' if compression == 'gzip': reader = BytesIO(reader.read()) else: reader = StringIO(reader.read().decode(encoding, errors)) else: if compression == 'gzip': reader = BytesIO(reader.read()) encoding = None return reader, encoding
def test_variable_width_unicode(): data = """ שלום שלום ום שלל של ום """.strip("\r\n") encoding = "utf8" kwargs = dict(header=None, encoding=encoding) expected = read_fwf(BytesIO(data.encode(encoding)), colspecs=[(0, 4), (5, 9)], **kwargs) result = read_fwf(BytesIO(data.encode(encoding)), **kwargs) tm.assert_frame_equal(result, expected)
def test_variable_width_unicode(self): if not compat.PY3: raise nose.SkipTest( 'Bytes-related test - only needs to work on Python 3') test = """ שלום שלום ום שלל של ום """.strip('\r\n') expected = read_fwf(BytesIO(test.encode('utf8')), colspecs=[(0, 4), (5, 9)], header=None, encoding='utf8') tm.assert_frame_equal(expected, read_fwf( BytesIO(test.encode('utf8')), header=None, encoding='utf8'))
def test_buffer_rd_bytes_bad_unicode(c_parser_only): # see gh-22748 t = BytesIO(b"\xB0") t = TextIOWrapper(t, encoding="ascii", errors="surrogateescape") msg = "'utf-8' codec can't encode character" with pytest.raises(UnicodeError, match=msg): c_parser_only.read_csv(t, encoding="UTF-8")
def test_buffer_rd_bytes_bad_unicode(self): # see gh-22748 t = BytesIO(b"\xB0") if PY3: t = TextIOWrapper(t, encoding='ascii', errors='surrogateescape') with pytest.raises(UnicodeError): self.read_csv(t, encoding='UTF-8')
def test_sniff_delimiter_encoding(python_parser_only, encoding): parser = python_parser_only data = """ignore this ignore this too index|A|B|C foo|1|2|3 bar|4|5|6 baz|7|8|9 """ if encoding is not None: data = u(data).encode(encoding) data = BytesIO(data) if compat.PY3: from io import TextIOWrapper data = TextIOWrapper(data, encoding=encoding) else: data = StringIO(data) result = parser.read_csv(data, index_col=0, sep=None, skiprows=2, encoding=encoding) expected = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["A", "B", "C"], index=Index(["foo", "bar", "baz"], name="index")) tm.assert_frame_equal(result, expected)
def get_filepath_or_buffer(filepath_or_buffer, encoding=None, compression=None): # Assuming AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY and AWS_S3_HOST # are environment variables parsed_url = parse_url(filepath_or_buffer) s3_host = os.environ.get('AWS_S3_HOST', 's3.amazonaws.com') try: conn = boto.connect_s3(host=s3_host) except boto.exception.NoAuthHandlerFound: conn = boto.connect_s3(host=s3_host, anon=True) b = conn.get_bucket(parsed_url.netloc, validate=False) if compat.PY2 and (compression == 'gzip' or (compression == 'infer' and filepath_or_buffer.endswith(".gz"))): k = boto.s3.key.Key(b, parsed_url.path) filepath_or_buffer = BytesIO( k.get_contents_as_string(encoding=encoding)) else: k = BotoFileLikeReader(b, parsed_url.path, encoding=encoding) k.open('r') # Expose read errors immediately filepath_or_buffer = k return filepath_or_buffer, None, compression
def test_encode(self, html_encoding_file): _, encoding = os.path.splitext( os.path.basename(html_encoding_file) )[0].split('_') try: with open(html_encoding_file, 'rb') as fobj: from_string = self.read_html(fobj.read(), encoding=encoding, index_col=0).pop() with open(html_encoding_file, 'rb') as fobj: from_file_like = self.read_html(BytesIO(fobj.read()), encoding=encoding, index_col=0).pop() from_filename = self.read_html(html_encoding_file, encoding=encoding, index_col=0).pop() tm.assert_frame_equal(from_string, from_file_like) tm.assert_frame_equal(from_string, from_filename) except Exception: # seems utf-16/32 fail on windows if is_platform_windows(): if '16' in encoding or '32' in encoding: pytest.skip() raise
def setup(self, engine): N = 2000 C = 5 self.df = DataFrame(np.random.randn(N, C), columns=['float{}'.format(i) for i in range(C)], index=date_range('20000101', periods=N, freq='H')) self.df['object'] = tm.makeStringIndex(N) self.bio_read = BytesIO() self.writer_read = ExcelWriter(self.bio_read, engine=engine) self.df.to_excel(self.writer_read, sheet_name='Sheet1') self.writer_read.save() self.bio_read.seek(0) self.bio_write = BytesIO() self.bio_write.seek(0) self.writer_write = ExcelWriter(self.bio_write, engine=engine)
def get_filepath_or_buffer(filepath_or_buffer, encoding=None, compression=None): """ If the filepath_or_buffer is a url, translate and return the buffer passthru otherwise. Parameters ---------- filepath_or_buffer : a url, filepath (str, py.path.local or pathlib.Path), or buffer encoding : the encoding to use to decode py3 bytes, default is 'utf-8' Returns ------- a filepath_or_buffer, the encoding, the compression """ if _is_url(filepath_or_buffer): req = _urlopen(str(filepath_or_buffer)) if compression == 'infer': content_encoding = req.headers.get('Content-Encoding', None) if content_encoding == 'gzip': compression = 'gzip' else: compression = None # cat on the compression to the tuple returned by the function to_return = list(maybe_read_encoded_stream(req, encoding, compression)) + \ [compression] return tuple(to_return) if _is_s3_url(filepath_or_buffer): try: import boto except: raise ImportError("boto is required to handle s3 files") # Assuming AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY # are environment variables parsed_url = parse_url(filepath_or_buffer) try: conn = boto.connect_s3() except boto.exception.NoAuthHandlerFound: conn = boto.connect_s3(anon=True) b = conn.get_bucket(parsed_url.netloc, validate=False) if compat.PY2 and (compression == 'gzip' or (compression == 'infer' and filepath_or_buffer.endswith(".gz"))): k = boto.s3.key.Key(b, parsed_url.path) filepath_or_buffer = BytesIO(k.get_contents_as_string( encoding=encoding)) else: k = BotoFileLikeReader(b, parsed_url.path, encoding=encoding) k.open('r') # Expose read errors immediately filepath_or_buffer = k return filepath_or_buffer, None, compression # It is a pathlib.Path/py.path.local or string filepath_or_buffer = _stringify_path(filepath_or_buffer) return _expand_user(filepath_or_buffer), None, compression
def test_BytesIO_input(self): if not compat.PY3: pytest.skip("Bytes-related test - only needs to work on Python 3") data = BytesIO("שלום::1234\n562::123".encode('cp1255')) result = self.read_table(data, sep="::", encoding='cp1255') expected = DataFrame([[562, 123]], columns=["שלום", "1234"]) tm.assert_frame_equal(result, expected)
def test_variable_width_unicode(): if not compat.PY3: pytest.skip("Bytes-related test - only needs to work on Python 3") data = """ שלום שלום ום שלל של ום """.strip("\r\n") encoding = "utf8" kwargs = dict(header=None, encoding=encoding) expected = read_fwf(BytesIO(data.encode(encoding)), colspecs=[(0, 4), (5, 9)], **kwargs) result = read_fwf(BytesIO(data.encode(encoding)), **kwargs) tm.assert_frame_equal(result, expected)
def _read_zipfile(self, url): zipf = BytesIO(self._get_response(url).content) with ZipFile(zipf, 'r') as zf: data = zf.open(zf.namelist()[0]).read().decode() return data
def test_read_csv_chunked_download(self, s3_resource, caplog): # 8 MB, S3FS usees 5MB chunks df = DataFrame(np.random.randn(100000, 4), columns=list('abcd')) buf = BytesIO() str_buf = StringIO() df.to_csv(str_buf) buf = BytesIO(str_buf.getvalue().encode('utf-8')) s3_resource.Bucket("pandas-test").put_object(Key="large-file.csv", Body=buf) with caplog.at_level(logging.DEBUG, logger='s3fs.core'): read_csv("s3://pandas-test/large-file.csv", nrows=5) # log of fetch_range (start, stop) assert ((0, 5505024) in set(x.args[-2:] for x in caplog.records))
def get_filepath_or_buffer(filepath_or_buffer, encoding=None, compression=None, mode=None): """ If the filepath_or_buffer is a url, translate and return the buffer. Otherwise passthrough. Parameters ---------- filepath_or_buffer : a url, filepath (str, py.path.local or pathlib.Path), or buffer compression : {{'gzip', 'bz2', 'zip', 'xz', None}}, optional encoding : the encoding to use to decode bytes, default is 'utf-8' mode : str, optional Returns ------- tuple of ({a filepath_ or buffer or S3File instance}, encoding, str, compression, str, should_close, bool) """ filepath_or_buffer = _stringify_path(filepath_or_buffer) if _is_url(filepath_or_buffer): req = urlopen(filepath_or_buffer) content_encoding = req.headers.get('Content-Encoding', None) if content_encoding == 'gzip': # Override compression based on Content-Encoding header compression = 'gzip' reader = BytesIO(req.read()) req.close() return reader, encoding, compression, True if is_s3_url(filepath_or_buffer): from pandas.io import s3 return s3.get_filepath_or_buffer(filepath_or_buffer, encoding=encoding, compression=compression, mode=mode) if is_gcs_url(filepath_or_buffer): from pandas.io import gcs return gcs.get_filepath_or_buffer(filepath_or_buffer, encoding=encoding, compression=compression, mode=mode) if isinstance(filepath_or_buffer, (compat.string_types, compat.binary_type, mmap.mmap)): return _expand_user(filepath_or_buffer), None, compression, False if not is_file_like(filepath_or_buffer): msg = "Invalid file path or buffer object type: {_type}" raise ValueError(msg.format(_type=type(filepath_or_buffer))) return filepath_or_buffer, None, compression, False
def test_BytesIO_input(self): if not compat.PY3: pytest.skip("Bytes-related test - only needs to work on Python 3") result = read_fwf(BytesIO("שלום\nשלום".encode('utf8')), widths=[2, 2], encoding='utf8') expected = DataFrame([["של", "ום"]], columns=["של", "ום"]) tm.assert_frame_equal(result, expected)
def _unpickle_array(bytes): arr = read_array(BytesIO(bytes)) # All datetimes should be stored as M8[ns]. When unpickling with # numpy1.6, it will read these as M8[us]. So this ensures all # datetime64 types are read as MS[ns] if is_datetime64_dtype(arr): arr = arr.view(_NS_DTYPE) return arr
def fastmsgpack_loads(data): raw = list(msgpack_unpack( BytesIO(_l1(data)), object_hook=object_hook, )) # raw will always be a list, which is most likely a list containing # a single dataframe or series if len(raw) == 1: # we only serialized one structure, just return it return raw[0] return raw
def test_encoding_non_utf8_multichar_sep(python_parser_only, sep, encoding): # see gh-3404 expected = DataFrame({"a": [1], "b": [2]}) parser = python_parser_only data = "1" + sep + "2" encoded_data = data.encode(encoding) result = parser.read_csv(BytesIO(encoded_data), sep=sep, names=["a", "b"], encoding=encoding) tm.assert_frame_equal(result, expected)
def test_utf16_example(self): path = tm.get_data_path('utf16_ex.txt') # it works! and is the right length result = self.read_table(path, encoding='utf-16') self.assertEqual(len(result), 50) if not compat.PY3: buf = BytesIO(open(path, 'rb').read()) result = self.read_table(buf, encoding='utf-16') self.assertEqual(len(result), 50)
def setup(self): self.f = '__test__.msg' self.N = 100000 self.C = 5 self.index = date_range('20000101', periods=self.N, freq='H') self.df = DataFrame(dict([('float{0}'.format(i), randn(self.N)) for i in range(self.C)]), index=self.index) self.N = 100000 self.C = 5 self.index = date_range('20000101', periods=self.N, freq='H') self.df2 = DataFrame(dict([('float{0}'.format(i), randn(self.N)) for i in range(self.C)]), index=self.index) self.df2['object'] = [('%08x' % randrange((16**8))) for _ in range(self.N)] self.remove(self.f) self.bio = BytesIO() self.writer = pd.io.excel.ExcelWriter(self.bio, engine='xlsxwriter') self.df[:2000].to_excel(self.writer) self.writer.save()
def test_read_csv_handles_boto_s3_object(self, s3_resource, tips_file): # see gh-16135 s3_object = s3_resource.meta.client.get_object(Bucket='pandas-test', Key='tips.csv') result = read_csv(BytesIO(s3_object["Body"].read()), encoding='utf8') assert isinstance(result, DataFrame) assert not result.empty expected = read_csv(tips_file) tm.assert_frame_equal(result, expected)
def test_utf16_bom_skiprows(self): # #2298 data = u("""skip this skip this too A\tB\tC 1\t2\t3 4\t5\t6""") data2 = u("""skip this skip this too A,B,C 1,2,3 4,5,6""") path = '__%s__.csv' % tm.rands(10) with tm.ensure_clean(path) as path: for sep, dat in [('\t', data), (',', data2)]: for enc in ['utf-16', 'utf-16le', 'utf-16be']: bytes = dat.encode(enc) with open(path, 'wb') as f: f.write(bytes) s = BytesIO(dat.encode('utf-8')) if compat.PY3: # somewhat False since the code never sees bytes from io import TextIOWrapper s = TextIOWrapper(s, encoding='utf-8') result = self.read_csv(path, encoding=enc, skiprows=2, sep=sep) expected = self.read_csv(s, encoding='utf-8', skiprows=2, sep=sep) s.close() tm.assert_frame_equal(result, expected)
def test_utf16_bom_skiprows(self): # #2298 data = u( """skip this skip this too A\tB\tC 1\t2\t3 4\t5\t6""" ) data2 = u( """skip this skip this too A,B,C 1,2,3 4,5,6""" ) path = "__%s__.csv" % tm.rands(10) with tm.ensure_clean(path) as path: for sep, dat in [("\t", data), (",", data2)]: for enc in ["utf-16", "utf-16le", "utf-16be"]: bytes = dat.encode(enc) with open(path, "wb") as f: f.write(bytes) s = BytesIO(dat.encode("utf-8")) if compat.PY3: # somewhat False since the code never sees bytes from io import TextIOWrapper s = TextIOWrapper(s, encoding="utf-8") result = self.read_csv(path, encoding=enc, skiprows=2, sep=sep) expected = self.read_csv(s, encoding="utf-8", skiprows=2, sep=sep) s.close() tm.assert_frame_equal(result, expected)
def setup(self): self.f = '__test__.msg' self.N = 100000 self.C = 5 self.index = date_range('20000101', periods=self.N, freq='H') self.df = DataFrame(dict([('float{0}'.format(i), randn(self.N)) for i in range(self.C)]), index=self.index) self.N = 100000 self.C = 5 self.index = date_range('20000101', periods=self.N, freq='H') self.df2 = DataFrame(dict([('float{0}'.format(i), randn(self.N)) for i in range(self.C)]), index=self.index) self.df2['object'] = [('%08x' % randrange((16 ** 8))) for _ in range(self.N)] self.remove(self.f) self.bio = BytesIO()
def test_streaming_s3_objects(): # GH17135 # botocore gained iteration support in 1.10.47, can now be used in read_* pytest.importorskip('botocore', minversion='1.10.47') from botocore.response import StreamingBody data = [ b'foo,bar,baz\n1,2,3\n4,5,6\n', b'just,the,header\n', ] for el in data: body = StreamingBody(BytesIO(el), content_length=len(el)) read_csv(body)
def setup(self): self.f = '__test__.msg' self.N = 100000 self.C = 5 self.index = date_range('20000101', periods=self.N, freq='H') self.df = DataFrame(dict([('float{0}'.format(i), randn(self.N)) for i in range(self.C)]), index=self.index) self.N = 100000 self.C = 5 self.index = date_range('20000101', periods=self.N, freq='H') self.df2 = DataFrame(dict([('float{0}'.format(i), randn(self.N)) for i in range(self.C)]), index=self.index) self.df2['object'] = [('%08x' % randrange((16 ** 8))) for _ in range(self.N)] self.remove(self.f) self.bio = BytesIO() self.writer = pd.io.excel.ExcelWriter(self.bio, engine='xlsxwriter') self.df[:2000].to_excel(self.writer) self.writer.save()
class Excel(_Packers): def setup(self): self._setup() self.bio = BytesIO() def time_write_excel_openpyxl(self): self.bio.seek(0) self.writer = pd.io.excel.ExcelWriter(self.bio, engine='openpyxl') self.df[:2000].to_excel(self.writer) self.writer.save() def time_write_excel_xlsxwriter(self): self.bio.seek(0) self.writer = pd.io.excel.ExcelWriter(self.bio, engine='xlsxwriter') self.df[:2000].to_excel(self.writer) self.writer.save() def time_write_excel_xlwt(self): self.bio.seek(0) self.writer = pd.io.excel.ExcelWriter(self.bio, engine='xlwt') self.df[:2000].to_excel(self.writer) self.writer.save()
def time_write_excel(self, engine): bio_write = BytesIO() bio_write.seek(0) writer_write = ExcelWriter(bio_write, engine=engine) self.df.to_excel(writer_write, sheet_name='Sheet1') writer_write.save()
def setup(self): self._setup() self.bio = BytesIO()
def setup(self): self._setup() self.bio = BytesIO() self.writer = pd.io.excel.ExcelWriter(self.bio, engine='xlsxwriter') self.df[:2000].to_excel(self.writer) self.writer.save()
def compserver(payload, serial): (allow_profiler, default_profiler_output, profile_by_default) = _get_profiler_info() requested_profiler_output = payload.get('profiler_output', default_profiler_output) profile = payload.get('profile') profiling = (allow_profiler and (profile or (profile_by_default and requested_profiler_output))) if profile and not allow_profiler: return ('profiling is disabled on this server', RC.FORBIDDEN) with ExitStack() as response_construction_context_stack: if profiling: from cProfile import Profile if (default_profiler_output == ':response' and requested_profiler_output != ':response'): # writing to the local filesystem is disabled return ("local filepaths are disabled on this server, only" " ':response' is allowed for the 'profiler_output' field", RC.FORBIDDEN) profiler_output = requested_profiler_output profiler = Profile() profiler.enable() # ensure that we stop profiling in the case of an exception response_construction_context_stack.callback(profiler.disable) expr = '<failed to parse expr>' @response_construction_context_stack.callback def log_time(start=time()): flask.current_app.logger.info('compute expr: %s\ntotal time (s): %.3f', expr, time() - start) ns = payload.get('namespace', {}) compute_kwargs = payload.get('compute_kwargs') or {} odo_kwargs = payload.get('odo_kwargs') or {} dataset = _get_data() ns[':leaf'] = symbol('leaf', discover(dataset)) expr = from_tree(payload['expr'], namespace=ns) assert len(expr._leaves()) == 1 leaf = expr._leaves()[0] try: result = serial.materialize(compute(expr, {leaf: dataset}, **compute_kwargs), expr.dshape, odo_kwargs) except NotImplementedError as e: return ("Computation not supported:\n%s" % e, RC.NOT_IMPLEMENTED) except Exception as e: return ("Computation failed with message:\n%s: %s" % (type(e).__name__, e), RC.INTERNAL_SERVER_ERROR) response = {'datashape': pprint(expr.dshape, width=0), 'data': serial.data_dumps(result), 'names': expr.fields} if profiling: import marshal from pstats import Stats if profiler_output == ':response': from pandas.compat import BytesIO file = BytesIO() else: file = open(_prof_path(profiler_output, expr), 'wb') with file: # Use marshal to dump the stats data to the given file. # This is taken from cProfile which unfortunately does not have # an api that allows us to pass the file object directly, only # a file path. marshal.dump(Stats(profiler).stats, file) if profiler_output == ':response': response['profiler_output'] = {'__!bytes': file.getvalue()} return serial.dumps(response)
def compserver(payload, serial): expected_keys = {u'namespace', u'odo_kwargs', u'compute_kwargs', u'expr', u'profile', u'profiler_output'} if not set(payload.keys()) < expected_keys: return ('unexpected keys in payload: %r' % sorted(set(payload.keys()) - expected_keys), RC.BAD_REQUEST) app = flask.current_app (allow_profiler, default_profiler_output, profile_by_default) = _get_profiler_info() requested_profiler_output = payload.get(u'profiler_output', default_profiler_output) profile = payload.get(u'profile') profiling = (allow_profiler and (profile or (profile_by_default and requested_profiler_output))) if profile and not allow_profiler: return ('profiling is disabled on this server', RC.FORBIDDEN) with ExitStack() as response_construction_context_stack: if profiling: from cProfile import Profile if (default_profiler_output == ':response' and requested_profiler_output != ':response'): # writing to the local filesystem is disabled return ("local filepaths are disabled on this server, only" " ':response' is allowed for the 'profiler_output' field", RC.FORBIDDEN) profiler_output = requested_profiler_output profiler = Profile() profiler.enable() # ensure that we stop profiling in the case of an exception response_construction_context_stack.callback(profiler.disable) expr = '<failed to parse expr>' @response_construction_context_stack.callback def log_time(start=time()): app.logger.info('compute expr: %s\ntotal time (s): %.3f', expr, time() - start) ns = payload.get(u'namespace', {}) compute_kwargs = payload.get(u'compute_kwargs') or {} odo_kwargs = payload.get(u'odo_kwargs') or {} dataset = _get_data() ns[':leaf'] = symbol('leaf', discover(dataset)) expr = from_tree(payload[u'expr'], namespace=ns) if len(expr._leaves()) != 1: return ('too many leaves, expected 1 got %d' % len(expr._leaves()), RC.BAD_REQUEST) leaf = expr._leaves()[0] formatter = getattr(flask.current_app, 'log_exception_formatter', _default_log_exception_formatter) try: result = serial.materialize(compute(expr, {leaf: dataset}, **compute_kwargs), expr.dshape, odo_kwargs) except NotImplementedError as e: # Note: `sys.exc_info()[2]` holds the current traceback, for # Python 2 / 3 compatibility. It's important not to store a local # reference to it. formatted_tb = formatter(sys.exc_info()[2]) error_msg = "Computation not supported:\n%s\n%s" % (e, formatted_tb) app.logger.error(error_msg) return (error_msg, RC.NOT_IMPLEMENTED) except Exception as e: formatted_tb = formatter(sys.exc_info()[2]) error_msg = "Computation failed with message:\n%s: %s\n%s" % (type(e).__name__, e, formatted_tb) app.logger.error(error_msg) return (error_msg, RC.INTERNAL_SERVER_ERROR) response = {u'datashape': pprint(expr.dshape, width=0), u'data': serial.data_dumps(result), u'names': expr.fields} if profiling: import marshal from pstats import Stats if profiler_output == ':response': from pandas.compat import BytesIO file = BytesIO() else: file = open(_prof_path(profiler_output, expr), 'wb') with file: # Use marshal to dump the stats data to the given file. # This is taken from cProfile which unfortunately does not have # an api that allows us to pass the file object directly, only # a file path. marshal.dump(Stats(profiler).stats, file) if profiler_output == ':response': response[u'profiler_output'] = {'__!bytes': file.getvalue()} return serial.dumps(response)