def load_dataset(setdir, dataset): setid = SetID(dataset.setid) files = [{ 'path': x.path, 'missing': bool(x.missing), 'mtime': x.mtime * 10**6, 'size': x.size } for x in sorted(dataset.files, key=lambda x: x.idx)] dct = { 'id': setid, 'name': dataset.name, 'files': files, 'time_added': dataset.time_added * 10**6, 'timestamp': dataset.timestamp * 10**6 } try: wrapper = Wrapper.from_dict(Dataset, dct) except KjException as e: from pprint import pformat print('Schema violation for %s with data:\n%s\nschema: %s' % (Dataset.schema.node.displayName, pformat(dct), Dataset.schema.node.displayName), file=sys.stderr) raise e wrapper._setdir = setdir # needed by dataset.load(node) return [wrapper]
def make_file(self, name): assert not self.ended assert not self.group, (self, name) path = os.path.join(self.streamdir, name) fd = os.open(path, os.O_CREAT | os.O_EXCL | os.O_WRONLY, 0o666) os.close(fd) # shall we pass on the open handle? # TODO: consider storing path relative to setdir instead of name file = File.new_message(path=path).as_reader() wrapper = Wrapper(file, self.streamdir, self.setdir) self.logdebug('made file %r', wrapper) return wrapper
def push(msg): schema = NODE_SCHEMA.get() if schema is not None and not isinstance(msg, Wrapper): try: msg = Wrapper.from_dict(schema, msg) except KjException: from pprint import pformat # pylint: disable=import-outside-toplevel _node = schema.schema.node err(f'Schema violation for {_node.displayName} with data:\n' f'{pformat(msg)}\nschema: {_node.displayName}') raise return Push(msg)
def __init__(self, prehandle, streamdir, setdir, msgs=None, info=None): header = info['header'] if info else {} handle = Handle(*prehandle.key, group=prehandle.group, header=header) self.handle = handle self.cache = deque([Msg(idx=-1, handle=handle, data=handle)], self.CACHESIZE) if handle.group: self.stream = iter([ Handle(handle.setid, handle.node, x['name'], group=bool(x['streams']), header=x['header']) for x in info['streams'].values() ]) elif msgs: self.stream = (Wrapper(x._reader, streamdir, setdir) for x in msgs) else: assert not info['streams'] path = os.path.join(streamdir, '{}-stream'.format(handle.name)) streamfile = open(path, 'rb') self.streamfile = streamfile self.stream = (Wrapper(x, streamdir, setdir) for x in handle.node.schema.read_multiple_packed(streamfile))
def load(self, setdir, default=()): # TODO: this is store, not node! from marv_pycapnp import Wrapper # TODO: handle substream fun nodedir = os.path.join(setdir, self.name) try: with open(os.path.join(nodedir, 'default-stream')) as f: msgs = self.schema.read_multiple_packed(f) return [Wrapper(x, None, setdir) for x in msgs] except IOError: if default is not (): return default raise
def load(self, setdir, node=None, nodename=None, default=()): assert bool(node) != bool(nodename) assert nodename != 'dataset' assert node.name != 'dataset' # TODO: handle substream fun name = nodename or self.name_by_node.get(node, node.name) nodedir = os.path.join(setdir, name) try: with open(os.path.join(nodedir, 'default-stream')) as f: msgs = node.schema.read_multiple_packed(f) return [Wrapper(x, None, setdir) for x in msgs] except IOError: if default is not (): return default raise
def load_dataset(setdir, dataset): # pylint: disable=redefined-outer-name setid = SetID(dataset.setid) files = [{'path': x.path, 'missing': bool(x.missing), 'mtime': x.mtime * 10**6, 'size': x.size} for x in sorted(dataset.files, key=lambda x: x.idx)] dct = {'id': setid, 'name': dataset.name, 'files': files, 'time_added': dataset.time_added * 10**6, 'timestamp': dataset.timestamp * 10**6} try: wrapper = Wrapper.from_dict(Dataset, dct, setdir=setdir) except KjException as e: from pprint import pformat # pylint: disable=import-outside-toplevel err('Schema violation for %s with data:\n%s\nschema: %s' % ( Dataset.schema.node.displayName, pformat(dct), Dataset.schema.node.displayName)) raise e return [wrapper]
def msg(self, __msg=None, _schema=NOTSET, **kw): from .io import TheEnd assert (__msg is not None) ^ bool(kw), (__msg, kw) data = kw if __msg is None else __msg if self.group: assert isinstance(data, (Handle, TheEnd)), (self, data) elif not isinstance(data, (Wrapper, Handle, TheEnd)): if _schema is NOTSET: from marv_api.ioctrl import NODE_SCHEMA # pylint: disable=import-outside-toplevel schema = NODE_SCHEMA.get() else: schema = _schema if schema is not None: try: data = Wrapper.from_dict(schema, data) except KjException: from pprint import pformat # pylint: disable=import-outside-toplevel _node = schema.schema.node err(f'Schema violation for {_node.displayName} with data:\n' f'{pformat(data)}\nschema: {_node.displayName}') raise return Msg(next(self._counter), self, data)
def test_file_wrapper(): wrapper = Wrapper.from_dict(TestStruct, {}) with pytest.raises(AttributeError): assert wrapper.path with pytest.raises(AttributeError): assert wrapper.relpath wrapper = Wrapper.from_dict(File, {'path': '/foo'}, setdir=Path(__file__).parent.parent) assert wrapper.path == '/foo' with pytest.raises(AttributeError): assert wrapper.relpath wrapper = Wrapper.from_dict(File, {'path': __file__}, setdir=Path(__file__).parent.parent, streamdir=Path(__file__).parent) assert wrapper.path == __file__ wrapper = Wrapper.from_dict(File, {'path': '/path/to/setdir/streamdir/file'}, setdir='/path/to/moved/setdir', streamdir='/irrelevant') assert wrapper.path == '/path/to/moved/setdir/streamdir/file' wrapper = Wrapper.from_dict(File, {'path': '/path/to/setdir/.streamdir/file'}, setdir='/path/to/moved/setdir', streamdir='/irrelevant') assert wrapper.path == '/path/to/moved/setdir/streamdir/file' assert wrapper.relpath == 'streamdir/file' # Moved, but old path exists, i.e. copied # Rhe last component of setdir is the setid, which usually is a random hash and looked for # in the stored path to return the new path. wrapper = Wrapper.from_dict( File, {'path': __file__}, setdir=f'/path/to/moved/{Path(__file__).parent.name}', streamdir='/irrelevant') assert wrapper.path == f'/path/to/moved/{Path(__file__).parent.name}/{Path(__file__).name}'
def make_map_dict(dct): mapdct = Wrapper.from_dict(Widget.Map, dct)._reader.to_dict(verbose=True, which=True) fixup_map(mapdct) return mapdct
def make_map_dict(dct): reader = Wrapper.from_dict(Widget.Map, dct)._reader # pylint: disable=protected-access mapdct = reader.to_dict(verbose=True, which=True) fixup_map(mapdct) return mapdct
def _run(self): self.started = True gen = self.node.invoke(self.inputs) assert hasattr(gen, 'send'), gen yield # Wait for start signal before returning anything notable yield self.stream request_counter = count() msg_request_counter = defaultdict(count) next_msg_index_counter = defaultdict(int) send = None finished = False while not finished: try: request = gen.send(send) self.logdebug('got from node %s', type(request)) request_idx = request_counter.next() except StopIteration: finished = True except Abort: finished = True if finished: for stream in self.streams.values(): if len(stream.cache) == 0: yield stream.handle.msg(stream.handle) if len(stream.cache) == 1: self.logdebug('finishing empty stream') if not stream.ended: yield stream.handle.msg(THEEND) break output = None # preprocess # TODO: # - yield out.msg() valid? or enfore # - yield marv.push(out.msg(foo)) or # - yield marv.push(foo, out) if isinstance(request, Push): output = request.output elif isinstance(request, (Msg, Wrapper)): output = request # process if output: if len(self.stream.cache) == 0: yield self.stream.handle.msg(self.stream.handle) # With first output, stream creation is done if self.stream_creation: self.stream_creation = False msg = output if not isinstance(msg, Msg): msg = self.stream.handle.msg(msg) else: assert msg.handle.node is self.node assert msg.handle.setid == self.setid schema = self.node.schema # TODO: handles should not be published by all? # this got introduced for merging streams if isinstance(msg._data, Handle): # TODO: check that stream we are publishing to is a Group assert self.node.group, self.node if schema is not None and \ not isinstance(msg._data, (Wrapper, Handle, TheEnd)): try: msg._data = Wrapper.from_dict(schema, msg._data) except KjException as e: from pprint import pformat self.logerror( 'Schema violation for %s with data:\n%s\nschema: %s', schema.schema.node.displayName, pformat(msg._data), schema.schema.node.displayName) raise e signal = yield msg assert signal in (NEXT, RESUME), signal continue elif isinstance(request, Pull): handle = request.handle # if request.skip is never used the two indices remain the same msg_req_idx = msg_request_counter[handle].next() next_msg_idx = next_msg_index_counter[handle] # + request.skip next_msg_index_counter[handle] = next_msg_idx + 1 msg = yield MsgRequest(handle, next_msg_idx, self) assert isinstance(msg, Msg), msg assert msg.idx == next_msg_idx # TODO: switch to DONE send = None if msg.data is THEEND else msg.data if request.enumerate: send = (msg_req_idx, send) continue elif isinstance(request, PullAll): send = [] for handle in request.handles: msg_req_idx = msg_request_counter[handle].next() next_msg_idx = next_msg_index_counter[handle] next_msg_index_counter[handle] = next_msg_idx + 1 msg = yield MsgRequest(handle, next_msg_idx, self) assert isinstance(msg, Msg), msg assert msg.idx == next_msg_idx # TODO: switch to DONE send.append(None if msg.data is THEEND else msg.data) continue elif isinstance(request, SetHeader): # TODO: should this be explicitly allowed/required? # Handles for non-header streams would be created right away assert len(self.stream.cache) == 0 self.stream.handle.header = request.header.copy() yield self.stream.handle.msg(self.stream.handle) continue elif isinstance(request, MakeFile): if len(self.stream.cache) == 0: yield self.stream.handle.msg(self.stream.handle) stream = self.streams[request.handle or self.stream.handle] try: make_file = stream.make_file except AttributeError: raise MakeFileNotSupported(stream) send = make_file(request.name) continue elif isinstance(request, Fork): parent_handle = self.stream.handle parent = self.streams[parent_handle] stream = parent.create_stream(name=request.name, group=request.group) fork = type(self)(stream, inputs=request.inputs) if len(self.stream.cache) == 0: yield self.stream.handle.msg(self.stream.handle) yield fork send = None # TODO: What should we send back? continue elif isinstance(request, GetStream): handle = Handle(request.setid or self.setid, request.node, request.name) msg = yield MsgRequest(handle, -1, self) send = msg.data assert send == handle, (send, handle) continue # TODO: why is this not covered? elif isinstance(request, CreateStream): kw = request._asdict() parent_handle = kw.pop('parent', None) or self.stream.handle parent = self.streams[parent_handle] stream = parent.create_stream(**kw) assert stream.name != 'default' assert stream.handle not in self.streams, stream self.streams[stream.handle] = stream if len(parent.cache) == 0: yield parent.handle.msg(parent.handle) if len(self.stream.cache) == 0: yield self.stream.handle.msg(self.stream.handle) yield stream yield stream.handle.msg(stream.handle) send = stream.handle continue elif isinstance(request, GetRequested): assert self.stream.group, (self, request) signal = yield PAUSED # increase chances for completeness assert signal is RESUME send = list(self._requested_streams) self.stream_creation = False continue elif isinstance(request, GetLogger): send = getLogger('marv.node.{}'.format(self.key_abbrev)) continue else: raise RuntimeError( 'Unknown request number {}: {!r} from {!r}'.format( request_idx + 1, request, self.node)) self.stopped = True
def test(): builder = TestStruct.new_message() reader = builder.as_reader() wrapper = Wrapper(reader, streamdir=None, setdir=None) assert repr( wrapper ) == '<Wrapper marv_pycapnp.tests.test_wrapper_capnp:TestStruct>' builder.text = u'\u03a8' assert wrapper.text == u'\u03a8' assert isinstance(wrapper.text, unicode) builder.data = u'\u03a8'.encode('utf-8') assert wrapper.data == u'\u03a8'.encode('utf-8') assert isinstance(wrapper.data, str) builder.textList = [u'\u03a8'] assert wrapper.text_list == [u'\u03a8'] assert wrapper.text_list[:] == [u'\u03a8'] assert list(wrapper.text_list) == [u'\u03a8'] assert isinstance(wrapper.text_list[0], unicode) assert repr(wrapper.textList) == "[u'\\u03a8']" builder.dataList = [u'\u03a8'.encode('utf-8')] assert wrapper.data_list == [u'\u03a8'.encode('utf-8')] assert wrapper.data_list[:] == [u'\u03a8'.encode('utf-8')] assert list(wrapper.data_list) == [u'\u03a8'.encode('utf-8')] assert isinstance(wrapper.data_list[0], str) assert repr(wrapper.dataList) == "['\\xce\\xa8']" builder.textListInList = [[u'\u03a8'], [u'\u03a8']] builder.dataListInList = [[u'\u03a8'.encode('utf-8')], [u'\u03a8'.encode('utf-8')]] assert all( isinstance(x, unicode) for lst in wrapper.textListInList for x in lst) assert all( isinstance(x, str) for lst in wrapper.dataListInList for x in lst) nested = Wrapper.from_dict(schema=TestStruct, data={ 'text': u'\u03a8', 'data': u'\u03a8'.encode('utf-8'), 'textList': [u'\u03a8'], 'dataList': [u'\u03a8'.encode('utf-8')], 'textListInList': [[u'\u03a8'], [u'\u03a8']], 'dataListInList': [[u'\u03a8'.encode('utf-8')], [u'\u03a8'.encode('utf-8')]], }) builder.nestedList = [nested._reader] assert isinstance(wrapper.nested_list[0].text, unicode) assert isinstance(wrapper.nested_list[0].data, str) assert isinstance(wrapper.nested_list[0].textList[0], unicode) assert isinstance(wrapper.nested_list[0].dataList[0], str) builder.unionData = u'\u03a8'.encode('utf-8') assert isinstance(wrapper.union_data, str) builder.unionText = u'\u03a8' assert isinstance(wrapper.union_text, unicode) builder.union.data = u'\u03a8'.encode('utf-8') assert isinstance(wrapper.union.data, str) builder.union.text = u'\u03a8' assert isinstance(wrapper.union.text, unicode) builder.group.text = u'\u03a8' assert isinstance(wrapper.group.text, unicode) builder.group.data = u'\u03a8'.encode('utf-8') assert isinstance(wrapper.group.data, str) builder.enum = 'foo' assert wrapper.enum == 'foo' builder.enum = 'bar' assert wrapper.enum == 'bar' dct = wrapper.to_dict(which=True) assert dct == { 'data': '\xce\xa8', 'dataList': ['\xce\xa8'], 'dataListInList': [['\xce\xa8'], ['\xce\xa8']], 'enum': 'bar', 'group': { 'data': '\xce\xa8', 'text': u'\u03a8' }, 'nestedList': [{ 'data': '\xce\xa8', 'dataList': ['\xce\xa8'], 'dataListInList': [['\xce\xa8'], ['\xce\xa8']], 'enum': 'foo', 'group': { 'data': '', 'text': u'', }, 'nestedList': [], 'text': u'\u03a8', 'textList': [u'\u03a8'], 'textListInList': [[u'\u03a8'], [u'\u03a8']], 'union': { 'text': u'', '_which': 'text', }, 'unionText': u'', '_which': 'unionText', }], 'text': u'\u03a8', 'textList': [u'\u03a8'], 'textListInList': [[u'\u03a8'], [u'\u03a8']], 'union': { 'text': u'\u03a8', '_which': 'text', }, 'unionText': u'\u03a8', '_which': 'unionText', } dct = wrapper.to_dict() roundtrip = Wrapper.from_dict(TestStruct, dct) assert dct == roundtrip.to_dict()
def test(): # pylint: disable=too-many-statements builder = TestStruct.new_message() reader = builder.as_reader() wrapper = Wrapper(reader, streamdir=None, setdir=None) assert repr( wrapper ) == '<Wrapper marv_pycapnp.tests.test_wrapper_capnp:TestStruct>' builder.text = '\u03a8' assert wrapper.text == '\u03a8' assert isinstance(wrapper.text, str) builder.data = '\u03a8'.encode('utf-8') assert wrapper.data == '\u03a8'.encode('utf-8') assert isinstance(wrapper.data, bytes) builder.textList = ['\u03a8'] assert wrapper.text_list == ['\u03a8'] assert wrapper.text_list[:] == ['\u03a8'] assert list(wrapper.text_list) == ['\u03a8'] assert isinstance(wrapper.text_list[0], str) assert repr(wrapper.textList) == "['Ψ']" builder.dataList = ['\u03a8'.encode('utf-8')] assert wrapper.data_list == ['\u03a8'.encode('utf-8')] assert wrapper.data_list[:] == ['\u03a8'.encode('utf-8')] assert list(wrapper.data_list) == ['\u03a8'.encode('utf-8')] assert isinstance(wrapper.data_list[0], bytes) assert repr(wrapper.dataList) == "[b'\\xce\\xa8']" builder.textListInList = [[u'\u03a8'], [u'\u03a8']] builder.dataListInList = [[u'\u03a8'.encode('utf-8')], [u'\u03a8'.encode('utf-8')]] assert all( isinstance(x, str) for lst in wrapper.textListInList for x in lst) assert all( isinstance(x, bytes) for lst in wrapper.dataListInList for x in lst) nested = Wrapper.from_dict( schema=TestStruct, data={ 'text': '\u03a8', 'data': '\u03a8'.encode('utf-8'), 'textList': ['\u03a8'], 'dataList': ['\u03a8'.encode('utf-8')], 'textListInList': [['\u03a8'], [u'\u03a8']], 'dataListInList': [['\u03a8'.encode('utf-8')], ['\u03a8'.encode('utf-8')]], }, ) builder.nestedList = [nested._reader] # pylint: disable=protected-access assert isinstance(wrapper.nested_list[0].text, str) assert isinstance(wrapper.nested_list[0].data, bytes) assert isinstance(wrapper.nested_list[0].textList[0], str) assert isinstance(wrapper.nested_list[0].dataList[0], bytes) builder.unionData = '\u03a8'.encode('utf-8') assert isinstance(wrapper.union_data, bytes) builder.unionText = '\u03a8' assert isinstance(wrapper.union_text, str) builder.union.data = '\u03a8'.encode('utf-8') assert isinstance(wrapper.union.data, bytes) builder.union.text = '\u03a8' assert isinstance(wrapper.union.text, str) builder.group.text = '\u03a8' assert isinstance(wrapper.group.text, str) builder.group.data = '\u03a8'.encode('utf-8') assert isinstance(wrapper.group.data, bytes) builder.enum = 'foo' assert wrapper.enum == 'foo' builder.enum = 'bar' assert wrapper.enum == 'bar' dct = wrapper.to_dict(which=True) assert dct == { 'data': b'\xce\xa8', 'dataList': [b'\xce\xa8'], 'dataListInList': [[b'\xce\xa8'], [b'\xce\xa8']], 'enum': 'bar', 'group': { 'data': b'\xce\xa8', 'text': '\u03a8', }, 'nestedList': [ { 'data': b'\xce\xa8', 'dataList': [b'\xce\xa8'], 'dataListInList': [[b'\xce\xa8'], [b'\xce\xa8']], 'enum': 'foo', 'group': { 'data': b'', 'text': '', }, 'nestedList': [], 'text': '\u03a8', 'textList': ['\u03a8'], 'textListInList': [['\u03a8'], ['\u03a8']], 'union': { 'text': '', '_which': 'text', }, 'unionText': '', '_which': 'unionText', }, ], 'text': '\u03a8', 'textList': ['\u03a8'], 'textListInList': [['\u03a8'], ['\u03a8']], 'union': { 'text': '\u03a8', '_which': 'text', }, 'unionText': '\u03a8', '_which': 'unionText', } # dict rountrip dct = wrapper.to_dict() roundtrip = Wrapper.from_dict(TestStruct, dct) assert dct == roundtrip.to_dict() # pickle roundtrip data = pickle.dumps(wrapper, protocol=5) roundtrip = pickle.loads(data) assert wrapper.to_dict() == roundtrip.to_dict() with pytest.raises(RuntimeError): data = pickle.dumps(wrapper)