def __init__(self, strip_id, file_size, block_size, strip_length): self.strip_id = strip_id self.file_size = file_size self.block_size = block_size self.strip_length = strip_length self.finished = False strip_size = block_size * strip_length self.position = strip_id * strip_size if (file_size - self.position) < strip_size: ##last strip self.length = file_size - self.position if 0 == (self.length % self.block_size): self.block_count = self.length / self.block_size else: self.block_count = ( (self.length - self.length % self.block_size) / self.block_size) + 1 else: self.length = strip_size self.block_count = strip_length self.mark = [] for i in range(self.block_count): self.mark.append(False) self.stream = io.BytesIO(bytearray(self.length)) self.writer = io.BufferedRandom(self.stream) self.content = ""
def prepare(self, cache_count): if not os.path.exists(self.filename): return False buffer_size = self.block_size * self.strip_length file_object = io.FileIO(self.filename, "r+") self.reader = io.BufferedRandom(file_object, buffer_size) self.cache_data = [] self.strip_index = {} self.cache_count = cache_count for i in range(self.cache_count): self.cache_data.append([]) ##slice strip strip_size = self.block_size * self.strip_length strip_count = 0 remainder = self.file_size % strip_size if 0 == remainder: strip_count = self.file_size / strip_size else: strip_count = ((self.file_size - remainder) / strip_size) + 1 ##reverse for strip in range(strip_count): self.strips.append(strip) self.strips.reverse() ##prepare all cache for cache_index in range(self.cache_count): if 0 != len(self.strips): self.loadStripToCache(cache_index) return True
def test_write_fileslack_from_stdin(self, testfs_fat_stable1): """ Test if writing fro stdin into the slackspace of a given destination works. """ teststring = "Small test for CLI" metadata_file = tempfile.NamedTemporaryFile().name expected = json.dumps(json.loads('{"version": 2, "files": {"0": ' \ + '{"uid": "0", "filename": ' \ + '"0", "metadata": {"clusters": ' \ + '[[3, 512, 18]]}}}, "module": "fat-file-slack"}')) for img_path in testfs_fat_stable1: # write metadata args = [ "fishy", "-d", img_path, "fileslack", "-w", "-d", "another", "-m", metadata_file ] sys.argv = args with io.BufferedRandom(io.BytesIO()) as patch_buffer: # save real stdin before monkey pathing it real_stdin = sys.stdin sys.stdin = patch_buffer sys.stdin.buffer = patch_buffer sys.stdin.write(teststring.encode('utf-8')) patch_buffer.seek(0) cli.main() # restore real stdin sys.stdin = real_stdin # compare outputted metadata with open(metadata_file) as metaf: metafcontent = metaf.read() assert metafcontent == expected # remove testfiles os.remove(metadata_file)
def test_stream_process_output(self): """ Process output stream does not buffer """ expected = "\n".join([str(n) for n in range(0, 10)]) stream = io.BytesIO() buf = io.BufferedRandom(stream) p = processhandler.ProcessHandler( [self.python, os.path.join("scripts", "proccountfive.py")], cwd=here, stream=buf, ) p.run() p.wait() for i in range(5, 10): stream.write(str(i).encode("utf8") + "\n".encode("utf8")) buf.flush() self.assertEquals(stream.getvalue().strip().decode("utf8"), expected) # make sure mozprocess doesn't close the stream # since mozprocess didn't create it self.assertFalse(buf.closed) buf.close() self.determine_status(p, False, ())
def _open( self, path, mode="rb", block_size=None, autocommit=True, cache_options=None, **kwargs, ): # e.g. render://host/owner/project/stack/[c0_]z1002_y3389_x10193_h2048_w2048_s1.tif host, owner, project, stack, file = self._strip_protocol(path).split(self.sep) try: c, z, y, x, h, w, s, f = re.match(r"c(.*)_z(\d+)_y(\d+)_x(\d+)_h(\d+)_w(\d+)_s(.*)\.(.*)", file).groups() except AttributeError: z, y, x, h, w, s, f = re.match(r"z(\d+)_y(\d+)_x(\d+)_h(\d+)_w(\d+)_s(.*)\.(.*)", file).groups() c = None f = f.lower() if f == "tif": f += "f" uri = f"http://{host}:8080/render-ws/v1/owner/{owner}/project/{project}/stack/{stack}/z/{z}/box/{x},{y},{w},{h},{s}/{f}-image" if c is not None: uri += f"?channels={c}" rq = requests.get(uri, stream=True) return io.BufferedRandom(io.BytesIO(rq.content))
def test_write_from_stdin(self, testfs_fat_stable1): """ Test if writing from from stdin into bad clusters works """ teststring = "Small test for CLI" metadata_file = tempfile.NamedTemporaryFile().name for img_path in testfs_fat_stable1: # write metadata args = [ "fishy", "-d", img_path, "badcluster", "-w", "-m", metadata_file ] sys.argv = args with io.BufferedRandom(io.BytesIO()) as patch_buffer: # save real stdin before monkey pathing it real_stdin = sys.stdin sys.stdin = patch_buffer sys.stdin.buffer = patch_buffer sys.stdin.write(teststring.encode('utf-8')) patch_buffer.seek(0) cli.main() # restore real stdin sys.stdin = real_stdin # check if there is a file in metadata with open(metadata_file) as metaf: metafcontent = json.loads(metaf.read()) filecount = len(metafcontent['files']) assert filecount == 1 # remove testfiles os.remove(metadata_file)
def excel_data_source(self, data_source): """Get a data_source out of an Excel file.""" # TODO: Need to flesh out this implementation quite a bit. See messytables instream = None if compat.urlparse(data_source).scheme in self.REMOTE_SCHEMES: instream = self._stream_from_url(data_source).read() else: try: data_source.seek(0) instream = data_source.read() except AttributeError: if os.path.exists(data_source): pass else: msg = 'data source has to be a stream or a path to be processed as excel' raise exceptions.DataSourceMalformatedError( msg, file_format='excel') try: if instream: workbook = xlrd.open_workbook(file_contents=instream) else: workbook = xlrd.open_workbook(data_source) except xlrd.biffh.XLRDError as e: raise exceptions.DataSourceMalformatedError(msg=e.args[0], file_format='excel') out = io.TextIOWrapper(io.BufferedRandom(io.BytesIO()), encoding=self.DEFAULT_ENCODING) sheet = workbook.sheet_by_index(self.excel_sheet_index) row_count = sheet.nrows for row_index in range(row_count): values = [] for cell in sheet.row(row_index): # TODO: this is very naive force to string if cell.ctype == 3: try: value = datetime.datetime(*xlrd.xldate_as_tuple( cell.value, sheet.book.datemode)).isoformat() except xlrd.xldate.XLDateError as e: raise exceptions.DataSourceMalformatedError( msg=e.args[0], file_format='excel') else: value = cell.value values.append(compat.str(value)) _data = ','.join('"{0}"'.format(v) for v in values) out.write('{0}\n'.format(_data)) out.seek(0) return out
def make_stream(name, f, mode='r', buffering=-1, encoding=None, errors=None, newline=None, line_buffering=False, **kwargs): """Take a Python 2.x binary file and returns an IO Stream""" r, w, a, binary = 'r' in mode, 'w' in mode, 'a' in mode, 'b' in mode if '+' in mode: r, w = True, True io_object = RawWrapper(f, mode=mode, name=name) if buffering >= 0: if r and w: io_object = io.BufferedRandom(io_object, buffering or io.DEFAULT_BUFFER_SIZE) elif r: io_object = io.BufferedReader(io_object, buffering or io.DEFAULT_BUFFER_SIZE) elif w: io_object = io.BufferedWriter(io_object, buffering or io.DEFAULT_BUFFER_SIZE) if not binary: io_object = io.TextIOWrapper(io_object, encoding=encoding, errors=errors, newline=newline, line_buffering=line_buffering,) return io_object
def load(self, source, mode='t', encoding=None): # Prepare source source = helpers.requote_uri(source) # Prepare bytes try: bytes = _RemoteStream(source, self.__http_session, self.__http_timeout).open() if not self.__http_stream: buffer = io.BufferedRandom(io.BytesIO()) buffer.write(bytes.read()) buffer.seek(0) bytes = buffer if self.__stats: bytes = helpers.BytesStatsWrapper(bytes, self.__stats) except IOError as exception: raise exceptions.HTTPError(str(exception)) # Return bytes if mode == 'b': return bytes # Detect encoding if self.__bytes_sample_size: sample = bytes.read( self.__bytes_sample_size)[:self.__bytes_sample_size] bytes.seek(0) encoding = helpers.detect_encoding(sample, encoding) # Prepare chars chars = io.TextIOWrapper(bytes, encoding) return chars
def load(self, source, mode='t', encoding=None): # Prepare source source = helpers.requote_uri(source) # Prepare bytes try: parts = urlparse(source, allow_fragments=False) response = self.__s3_client.get_object(Bucket=parts.netloc, Key=parts.path[1:]) # https://github.com/frictionlessdata/tabulator-py/issues/271 bytes = io.BufferedRandom(io.BytesIO()) bytes.write(response['Body'].read()) bytes.seek(0) except Exception as exception: raise exceptions.IOError(str(exception)) # Return bytes if mode == 'b': return bytes # Detect encoding if self.__bytes_sample_size: sample = bytes.read(self.__bytes_sample_size) bytes.seek(0) encoding = helpers.detect_encoding(sample, encoding) # Prepare chars chars = io.TextIOWrapper(bytes, encoding) return chars
def load(self, source, mode='t', encoding=None, allow_zip=False): # Requote uri source = helpers.requote_uri(source) # Prepare bytes try: if six.PY2: response = urlopen(source) bytes = io.BufferedRandom(io.BytesIO()) bytes.write(response.read()) bytes.seek(0) else: bytes = _WebStream(source) response = bytes.response sample = bytes.read(self.__bytes_sample_size) bytes.seek(0) except URLError as exception: raise exceptions.HTTPError(str(exception)) if not allow_zip: if helpers.detect_zip(sample): message = 'Format has been detected as ZIP (not supported)' raise exceptions.FormatError(message) # Return or raise if mode == 'b': return bytes else: encoding = helpers.detect_encoding(sample, encoding) chars = io.TextIOWrapper(bytes, encoding) return chars
def load(self, source, encoding, mode): # Requote uri source = helpers.requote_uri(source) # Prepare bytes if six.PY2: response = urlopen(source) bytes = io.BufferedRandom(io.BytesIO()) bytes.write(response.read()) bytes.seek(0) else: bytes = _WebStream(source) response = bytes.response # Prepare encoding if encoding is None: if six.PY2: encoding = response.headers.getparam('charset') else: encoding = response.headers.get_content_charset() encoding = helpers.detect_encoding(bytes, encoding) # Return or raise if mode == 'b': return bytes elif mode == 't': chars = io.TextIOWrapper(bytes, encoding, **self.__options) return chars else: message = 'Mode %s is not supported' % mode raise exceptions.LoadingError(message)
def load(self, source, encoding, mode): # Prepare source scheme = 'text://' if source.startswith(scheme): source = source.replace(scheme, '', 1) # Prepare encoding if encoding is None: encoding = helpers.DEFAULT_ENCODING # Prepare bytes bytes = io.BufferedRandom(io.BytesIO()) bytes.write(source.encode(encoding)) bytes.seek(0) # Return or raise if mode == 'b': return bytes elif mode == 't': chars = io.TextIOWrapper(bytes, encoding, **self.__options) return chars else: message = 'Mode %s is not supported' % mode raise exceptions.LoadingError(message)
def __init__( self, raw: IO, buffer_size: Optional[int] = io.DEFAULT_BUFFER_SIZE, *, loop: Loop = asyncio.get_event_loop()) -> None: self._loop = loop if isinstance(raw, AsyncFileIO, AsyncTextIOWrapper): raw = raw._hidden self._hidden = io.BufferedRandom(raw, buffer_size)
def read_byte_stream_create(self): scheme = "text://" source = self.file.source if source.startswith(scheme): source = source.replace(scheme, "", 1) byte_stream = io.BufferedRandom(io.BytesIO()) byte_stream.write(source.encode(config.DEFAULT_ENCODING)) byte_stream.seek(0) return byte_stream
def testReadAndWrite(self): raw = MockRawIO((b"asdf", b"ghjk")) rw = io.BufferedRandom(raw, 8, 12) self.assertEqual(b"as", rw.read(2)) rw.write(b"ddd") rw.write(b"eee") self.assertFalse(raw._write_stack) # Buffer writes self.assertEqual(b"ghjk", rw.read()) # This read forces write flush self.assertEquals(b"dddeee", raw._write_stack[0])
def test_readline(self): import _io as io with io.BytesIO(b"abc\ndef\nxyzzy\nfoo\x00bar\nanother line") as raw: with io.BufferedRandom(raw, buffer_size=10) as f: assert f.readline() == b"abc\n" assert f.readline(10) == b"def\n" assert f.readline(2) == b"xy" assert f.readline(4) == b"zzy\n" assert f.readline() == b"foo\x00bar\n" assert f.readline(None) == b"another line" raises(TypeError, f.readline, 5.3)
def read_byte_stream_create(self): fullpath = requests.utils.requote_uri(self.resource.fullpath) session = self.resource.control.http_session timeout = self.resource.control.http_timeout byte_stream = RemoteByteStream(fullpath, session=session, timeout=timeout).open() if self.resource.control.http_preload: buffer = io.BufferedRandom(io.BytesIO()) buffer.write(byte_stream.read()) buffer.seek(0) byte_stream = buffer return byte_stream
def read_byte_stream_create(self): scheme = "text://" text = self.resource.fullpath if text.startswith(scheme): text = text.replace(scheme, "", 1) if text.endswith(f".{self.resource.format}"): text = text[: -(len(self.resource.format) + 1)] byte_stream = io.BufferedRandom(io.BytesIO()) byte_stream.write(text.encode(config.DEFAULT_ENCODING)) byte_stream.seek(0) return byte_stream
def read_byte_stream_create(self): boto3 = helpers.import_from_plugin("boto3", plugin="aws") control = self.file.control client = boto3.client("s3", endpoint_url=control.endpoint_url) source = requests.utils.requote_uri(self.file.source) parts = urlparse(source, allow_fragments=False) response = client.get_object(Bucket=parts.netloc, Key=parts.path[1:]) # https://github.com/frictionlessdata/tabulator-py/issues/271 byte_stream = io.BufferedRandom(io.BytesIO()) byte_stream.write(response["Body"].read()) byte_stream.seek(0) return byte_stream
def test_interleaved_readline_write(self): import _io as io with io.BytesIO(b'ab\ncdef\ng\n') as raw: with io.BufferedRandom(raw) as f: f.write(b'1') assert f.readline() == b'b\n' f.write(b'2') assert f.readline() == b'def\n' f.write(b'3') assert f.readline() == b'\n' f.flush() assert raw.getvalue() == b'1b\n2def\n3\n'
def test_unpickling_buffering_readline(self): # Issue #12687: the unpickler's buffering logic could fail with # text mode opcodes. data = list(range(10)) for proto in protocols: for buf_size in range(1, 11): f = io.BufferedRandom(io.BytesIO(), buffer_size=buf_size) pickler = self.pickler_class(f, protocol=proto) pickler.dump(data) f.seek(0) unpickler = self.unpickler_class(f) self.assertEqual(unpickler.load(), data)
def __init__(self, argv): signal.signal(signal.SIGINT, self.sigint) self.file = argv[1] self.readonly = False if self.file.startswith('ip:'): ip = tuple(self.file.split(':')[1:]) print(ip) self.fd = socket.create_connection(self.file.split(':')[1:]) self.io = self.fd.makefile('rw') else: if stat.S_ISCHR(os.stat(self.file).st_mode): self.fd = serial.Serial(self.file, 115200, timeout=1) self.io = io.TextIOWrapper(io.BufferedRandom(self.fd)) else: self.fd = io.FileIO(self.file, 'r') self.io = io.TextIOWrapper(io.BufferedReader(self.fd)) self.readonly = True self.leftbox = urwid.ListBox( urwid.SimpleFocusListWalker([urwid.Text('')])) self.rightbox = urwid.ListBox( urwid.SimpleFocusListWalker([urwid.Text(''), urwid.Text('')])) self.column = urwid.Columns([self.leftbox, self.rightbox]) self.header = urwid.AttrMap(urwid.Text('CAN Bus data stats'), 'header') self.status_icons = urwid.WidgetPlaceholder(urwid.Text('')) self.status_packets = urwid.WidgetPlaceholder(urwid.Text('')) self.footer = urwid.AttrMap( urwid.Columns([self.status_icons, self.status_packets]), 'footer') self.frame = urwid.PopUpTarget( urwid.Frame(self.column, header=self.header, footer=self.footer)) self.statline = [] self.c_packets = 0 self.errors = 0 self.packets = [] self.packets_by_id = dictlist() self.packet_last_time = {} self.packet_avg_times = {} self.ids = Counter() self.classes = Counter() self.event_loop = urwid.AsyncioEventLoop() self.main_loop = urwid.MainLoop(self.frame, event_loop=self.event_loop, unhandled_input=self.handle_key) self.watch_file_handle = None self.toggle_watch() self.event_loop.alarm(0.25, self.update_display) if self.readonly: self.update_statline('RO')
def make_stream( name, # type: Text bin_file, # type: RawIOBase mode="r", # type: Text buffering=-1, # type: int encoding=None, # type: Optional[Text] errors=None, # type: Optional[Text] newline="", # type: Optional[Text] line_buffering=False, # type: bool **kwargs # type: Any ): # type: (...) -> IO """Take a Python 2.x binary file and return an IO Stream. """ reading = "r" in mode writing = "w" in mode appending = "a" in mode binary = "b" in mode if "+" in mode: reading = True writing = True encoding = None if binary else (encoding or "utf-8") io_object = RawWrapper(bin_file, mode=mode, name=name) # type: io.IOBase if buffering >= 0: if reading and writing: io_object = io.BufferedRandom( typing.cast(io.RawIOBase, io_object), buffering or io.DEFAULT_BUFFER_SIZE, ) elif reading: io_object = io.BufferedReader( typing.cast(io.RawIOBase, io_object), buffering or io.DEFAULT_BUFFER_SIZE, ) elif writing or appending: io_object = io.BufferedWriter( typing.cast(io.RawIOBase, io_object), buffering or io.DEFAULT_BUFFER_SIZE, ) if not binary: io_object = io.TextIOWrapper( io_object, encoding=encoding, errors=errors, newline=newline, line_buffering=line_buffering, ) return io_object
def open(self, path, mode, buffer_size=1024, encoding=None): mode2 = mode if "b" in mode else (mode.replace("t", "") + "b") file = LocalFile(self.client, path, mode2) if file.readable() and file.writable(): file = io.BufferedRandom(file, buffer_size) elif file.readable(): file = io.BufferedReader(file, buffer_size) elif file.writable(): file = io.BufferedWriter(file, buffer_size) if encoding: return io.TextIOWrapper(file, encoding=encoding) else: return file
def _sample_stream(self, row_limit=100): sample = io.TextIOWrapper(io.BufferedRandom(io.BytesIO()), encoding=self.DEFAULT_ENCODING) for index, row in enumerate(self.stream): if index > row_limit: break else: sample.write(row) self.stream.seek(0) sample.seek(0) return sample
def test_standalone_report_stream_valid(self): filepath = os.path.join(self.data_dir, 'valid.csv') report_stream = io.TextIOWrapper(io.BufferedRandom(io.BytesIO())) with io.open(filepath) as stream: validator = processors.SchemaProcessor( report_stream=report_stream) result, report, data = validator.run(stream) self.assertEqual(len(report.generate()['results']), 0) report_stream.seek(0) for line in report_stream: self.assertTrue(json.loads(line.rstrip('\n')))
def test_pipeline_report_stream_valid(self): filepath = os.path.join(self.data_dir, 'valid.csv') report_stream = io.TextIOWrapper(io.BufferedRandom(io.BytesIO())) options = {} validator = Pipeline(filepath, processors=('schema',), report_stream=report_stream, options=options) result, report = validator.run() self.assertEqual(len(report.generate()['results']), 0) report_stream.seek(0) for line in report_stream: self.assertTrue(json.loads(line.rstrip('\n')))
def _stream_from_url(self, url): """Return a seekable and readable stream from a URL.""" stream = io.BufferedRandom(io.BytesIO()) valid_url = helpers.make_valid_url(url) try: document = compat.urlopen(valid_url) except compat.HTTPError as e: raise exceptions.DataSourceHTTPError(status=e.getcode()) stream.write(document.read()) stream.seek(0) return stream
def make_stream(name, bin_file, mode='r', buffering=-1, encoding=None, errors=None, newline='', line_buffering=False, **kwargs): """Take a Python 2.x binary file and return an IO Stream.""" reading = 'r' in mode writing = 'w' in mode appending = 'a' in mode binary = 'b' in mode if '+' in mode: reading = True writing = True encoding = None if binary else (encoding or 'utf-8') io_object = RawWrapper(bin_file, mode=mode, name=name) if buffering >= 0: if reading and writing: io_object = io.BufferedRandom( io_object, buffering or io.DEFAULT_BUFFER_SIZE ) elif reading: io_object = io.BufferedReader( io_object, buffering or io.DEFAULT_BUFFER_SIZE ) elif writing or appending: io_object = io.BufferedWriter( io_object, buffering or io.DEFAULT_BUFFER_SIZE ) if not binary: io_object = io.TextIOWrapper( io_object, encoding=encoding, errors=errors, newline=newline, line_buffering=line_buffering, ) return io_object