def StreamDef(field=None, shape=None, is_sparse=False, transforms=None, context=None, scp=None, mlf=None, broadcast=None, defines_mb_size=False): ''' Configuration of a stream for use with the builtin Deserializers. The meanings of some configuration keys have a mild dependency on the exact deserializer, and certain keys are meaningless for certain deserializers. Args: field (`str`, defaults to `None`): this is the name of the stream * for CTFDeserializer the name is inside the CTF file * for ImageDeserializer the acceptable names are `image` or `label` * for HTKFeatureDeserializer and HTKMLFDeserializer only the default value of None is acceptable shape (`int` or `tuple`, defaults to `None`): dimensions of this stream. HTKFeatureDeserializer, HTKMLFDeserializer, and CTFDeserializer read data as flat arrays. If you need different shapes you can :func:`~cntk.ops.reshape` it later. is_sparse (`bool`, defaults to `False`): whether the provided data is sparse. `False` by default, unless mlf is provided. transforms (`list`, defaults to `None`): list of transforms to be applied by the Deserializer. Currently only ImageDeserializer supports transforms. context (`tuple`, defaults to `None`): left and right context to consider when reading in HTK data. Only supported by HTKFeatureDeserializer. scp (`str` or `list`, defaults to `None`): scp files for HTK data mlf (`str` or `list`, defaults to `None`): mlf files for HTK data broadcast (`bool`, defaults to `None`): whether the features in this stream should be broadcast to the whole sequence (useful in e.g. ivectors with HTK) defines_mb_size (`bool`, defaults to False): whether this stream defines the minibatch size. ''' config = dict(stream_alias=field, is_sparse=is_sparse) if shape is not None: config['dim'] = shape if transforms is not None: config['transforms'] = transforms if context is not None: config['context'] = context if scp is not None: config['scp'] = scp if mlf is not None: config['mlf'] = mlf config['is_sparse'] = True if broadcast is not None: config['broadcast'] = broadcast config['defines_mb_size'] = True if defines_mb_size else False return Record(**config)
def __init__(self, deserializers=None, randomize=True, randomization_window=DEFAULT_RANDOMIZATION_WINDOW_IN_CHUNKS, sample_based_randomization_window=False, epoch_size=INFINITELY_REPEAT, distributed_after=INFINITE_SAMPLES, multithreaded_deserializer=None, frame_mode=False, truncation_length=0): if not isinstance(deserializers, (list, tuple)): deserializers = [deserializers ] # allow passing a single item or a list reader_config = _ReaderConfig( deserializers=deserializers, randomize=randomize, randomization_window=randomization_window, sample_based_randomization_window=sample_based_randomization_window, epoch_size=epoch_size, distributed_after=distributed_after, multithreaded_deserializer=multithreaded_deserializer, frame_mode=frame_mode, truncation_length=truncation_length) source = reader_config.minibatch_source() # transplant into this class instance self.__dict__ = source.__dict__ # transplant all members of deserializers into a record called streams streams = {} for si in self.stream_infos(): streams[si.m_name] = si from ..variables import Record self.streams = Record(**streams)
def _stream_infos(self, infos=None): inner = self.stream_infos() if len(inner) == 0: raise ValueError('Deserializer must provide at least one stream') infos.extend(inner) streams = {si.m_name: si for si in inner} self.streams = Record(**streams)
def streams(self): ''' Describes the streams 'this' minibatch source produces. Returns: A `dict` mapping input names to instances of :class:`StreamInformation` ''' if self._streams is None: self._streams = Record(**dict((info.m_name, info) for info in self.stream_infos())) return self._streams
def __init__(self): super(UserMinibatchSource, self).__init__() streams = {si.m_name: si for si in self.stream_infos()} self.streams = Record(**streams)
def __enter__(self): _OptionsContextManager._current_default_overrides = Record( _scope=self.scope, _outer=_OptionsContextManager._current_default_overrides, **self.kwargs) # insert new scope at head of link return self