def next_minibatch(self, minibatch_size_in_samples, input_map=None, device=None, num_data_partitions=None, partition_index=None): ''' Reads a minibatch that contains data for all input streams. The minibatch size is specified in terms of #samples and/or #sequences for the primary input stream; value of 0 for #samples/#sequences means unspecified. In case the size is specified in terms of both #sequences and #samples, the smaller of the 2 is taken. An empty map is returned when the MinibatchSource has no more data to return. Args: minibatch_size_in_samples (int): number of samples to retrieve for the next minibatch. Must be > 0. **Important:** Click :cntkwiki:`here <BrainScript-minibatchSize-and-Python-minibatch_size_in_samples-in-CNTK>` for a full description of this parameter. input_map (dict): mapping of :class:`~cntk.variables.Variable` to :class:`StreamInformation` which will be used to convert the returned data. device (`DeviceDescriptor`, defaults to `None`): CNTK DeviceDescriptor num_data_partitions: Used for distributed training, indicates into how many partitions the source should split the data. partition_index (`int`, defaults to `None`): Used for distributed training, indicates data from which partition to take. Returns: cntk.io.MinibatchData: A mapping of :class:`StreamInformation` to :class:`MinibatchData` if `input_map` was not specified. Otherwise, the returned value will be a mapping of :class:`~cntk.variables.Variable` to class:`MinibatchData`. When the maximum number of epochs/samples is exhausted, the return value is an empty dict. ''' if self._last_mb_data is not None: self._last_mb_data.clear() if device is None: device = use_default_device() if num_data_partitions is None: num_data_partitions = 1 if partition_index is None: partition_index = 0 parent_inst = super(MinibatchSource, self) mb = parent_inst.get_next_minibatch(0, minibatch_size_in_samples, num_data_partitions, partition_index, device) if not mb: return mb if not input_map: return mb # We copy minibatch data here, # we need to make sure it is cleaned when next_minibatch # is called next time. self._last_mb_data = {key: mb[value] for (key, value) in input_map.items()} return self._last_mb_data
def __init__(self, shape=None, init=None, dtype=None, device=None, name=''): if not device: device = use_default_device() if dtype is not None: if isinstance(init, np.ndarray) and dtype != init.dtype: init = np.array(init, dtype=dtype) else: if np.isscalar(init) and not shape: shape = () if isinstance(init, np.ndarray): dtype = init.dtype else: dtype = np.float32 if init is None: init = 0 if isinstance(init, (np.ndarray, list, float, int)): ndav = sanitize_value(shape, init, dtype, device) super(Parameter, self).__init__(ndav, name) else: shape = sanitize_shape(shape) cntk_dtype = sanitize_dtype_cntk(dtype) super(Parameter, self).__init__(shape, cntk_dtype, init, device, name)
def __init__(self, value=None, shape=None, dtype=None, device=None, name=''): if not device: device = use_default_device() if (np.isscalar(value) or isinstance(value, np.ndarray)) and not shape: shape = () if dtype is not None: if isinstance(value, np.ndarray) and dtype != value.dtype: value = np.array(value, dtype=dtype) else: if isinstance(value, np.ndarray): dtype = value.dtype else: dtype = np.float32 if np.isscalar(value): super(Constant, self).__init__(sanitize_shape(shape), sanitize_dtype_cntk(dtype), value, device, name) else: ndav = sanitize_value(shape, value, dtype, device) super(Constant, self).__init__(ndav, name)
def get_next_minibatch(self, minibatch_size_in_samples, minibatch_size_in_sequences=None, device=None): ''' Reads a minibatch that contains data for all input streams. The minibatch size is specified terms of #samples and/or #sequences for the primary input stream; value of 0 for #samples/#sequences means unspecified. In case the size is specified in terms of both #sequences and #samples, the smaller of the 2 is taken. An empty map is returned when the MinibatchSource has no more data to return.'' Args: minibatch_size_in_samples (`int`): number of samples to retrieve for the next minibatch. Must be > 0. minibatch_size_in_sequences (`int`, defaults to `None`): number of samples to retrieve for the next minibatch. Must be > 0. device (`DeviceDescriptor`, defaults to `None`): CNTK DeviceDescriptor Returns: `:class:MinibatchData` ''' if device is None: device = use_default_device() if minibatch_size_in_sequences is None: return super(MinibatchSource, self).get_next_minibatch(minibatch_size_in_samples, device) else: return super(MinibatchSource, self).get_next_minibatch(minibatch_size_in_samples, minibatch_size_in_sequences, device)
def one_hot(batch, num_classes, dtype=None, device=None): ''' Converts ``batch`` into a :class:`Value` object of ``dtype`` such that the integer data in ``batch`` is interpreted as the indices representing one-hot vectors. Args: batch (list (of lists, if sequence) of index data): batch input data num_classes (int): number of classes dtype (`np.float32`, `np.float64`, default None): data type device (:class:`~cntk.device.DeviceDescriptor`, default None): device this value should be put on Returns: ``batch`` converted into a :class:`~Value` object that can be passed to the forward or eval function. ''' if device is None: device = use_default_device() if dtype in [np.float32, None]: value = cntk_py.Value.create_one_hot_float(num_classes, batch, device, False) elif dtype == np.float64: value = cntk_py.Value.create_one_hot_double(num_classes, batch, device, False) return value
def get_next_minibatch(self, minibatch_size_in_samples, minibatch_size_in_sequences=None, device=None): ''' Reads a minibatch that contains data for all input streams. The minibatch size is specified terms of #samples and/or #sequences for the primary input stream; value of 0 for #samples/#sequences means unspecified. In case the size is specified in terms of both #sequences and #samples, the smaller of the 2 is taken. An empty map is returned when the MinibatchSource has no more data to return.'' Args: minibatch_size_in_samples (`int`): number of samples to retrieve for the next minibatch. Must be > 0. minibatch_size_in_sequences (`int`, defaults to `None`): number of samples to retrieve for the next minibatch. Must be > 0. device (`DeviceDescriptor`, defaults to `None`): CNTK DeviceDescriptor Returns: `:class:MinibatchData` ''' if device is None: device = use_default_device() if minibatch_size_in_sequences is None: return super(MinibatchSource, self).get_next_minibatch( minibatch_size_in_samples, device) else: return super(MinibatchSource, self).get_next_minibatch( minibatch_size_in_samples, minibatch_size_in_sequences, device)
def _create_NDArrayView(shape, data_type=cntk_py.DataType_Float, device=None): shape = sanitize_shape(shape) if device is None: device = use_default_device() # FIXME only dense supported so far view = cntk_py.NDArrayView(data_type, cntk_py.StorageFormat_Dense, shape, device) return view
def next_minibatch(self, minibatch_size_in_samples=None, minibatch_size_in_sequences=None, input_map=None, device=None): ''' Reads a minibatch that contains data for all input streams. The minibatch size is specified in terms of #samples and/or #sequences for the primary input stream; value of 0 for #samples/#sequences means unspecified. In case the size is specified in terms of both #sequences and #samples, the smaller of the 2 is taken. An empty map is returned when the MinibatchSource has no more data to return. Args: minibatch_size_in_samples (int): number of samples to retrieve for the next minibatch. Must be > 0. minibatch_size_in_sequences (int, defaults to `None`): number of samples to retrieve for the next minibatch. Must be > 0. input_map (dict): mapping of :class:`~cntk.ops.variabls.Variable` to :class:`StreamInformation` which will be used to convert the returned data. device (`DeviceDescriptor`, defaults to `None`): CNTK DeviceDescriptor Returns: A mapping of :class:`StramInformation` to :class:`MinibatchData` if ``input_map`` was not specified. Otherwise, the returned value will be a mapping of :class:`~cntk.ops.variabls.Variable` to class:`MinibatchData`. ''' if device is None: device = use_default_device() if minibatch_size_in_samples is None and \ minibatch_size_in_sequences is None: raise ValueError( 'you have to specify at least one of ' 'minibatch_size_in_samples or minibatch_size_in_sequences') if minibatch_size_in_sequences is None: mb = super(MinibatchSource, self).get_next_minibatch(minibatch_size_in_samples, device) else: if minibatch_size_in_samples is None: minibatch_size_in_samples = 0 mb = super(MinibatchSource, self).get_next_minibatch(minibatch_size_in_samples, minibatch_size_in_sequences, device) if input_map: if not mb: return None else: return {key: mb[value] for (key, value) in input_map.items()} else: return mb
def next_minibatch(self, minibatch_size_in_samples, input_map=None, device=None, num_data_partitions=None, partition_index=None): ''' Reads a minibatch that contains data for all input streams. The minibatch size is specified in terms of #samples and/or #sequences for the primary input stream; value of 0 for #samples/#sequences means unspecified. In case the size is specified in terms of both #sequences and #samples, the smaller of the 2 is taken. An empty map is returned when the MinibatchSource has no more data to return. Args: minibatch_size_in_samples (int): number of samples to retrieve for the next minibatch. Must be > 0. **Important:** Click `here <https://github.com/Microsoft/CNTK/wiki/BrainScript-minibatchSize-and-Python-minibatch_size_in_samples-in-CNTK>`__ for a full description of this parameter. input_map (dict): mapping of :class:`~cntk.variables.Variable` to :class:`~cntk.cntk_py.StreamInformation` which will be used to convert the returned data. device (`DeviceDescriptor`, defaults to `None`): CNTK DeviceDescriptor num_data_partitions: Used for distributed training, indicates into how many partitions the source should split the data. partition_index (`int`, defaults to `None`): Used for distributed training, indicates data from which partition to take. Returns: cntk.io.MinibatchData: A mapping of :class:`~cntk.cntk_py.StreamInformation` to :class:`MinibatchData` if `input_map` was not specified. Otherwise, the returned value will be a mapping of :class:`~cntk.variables.Variable` to class:`MinibatchData`. ''' if device is None: device = use_default_device() if num_data_partitions is None: num_data_partitions = 1 if partition_index is None: partition_index = 0 mb = super(MinibatchSource, self).get_next_minibatch(0, minibatch_size_in_samples, num_data_partitions, partition_index, device) if not mb: return mb if not input_map: return mb return {key: mb[value] for (key, value) in input_map.items()}
def next_minibatch(self, minibatch_size_in_samples=None, minibatch_size_in_sequences=None, input_map=None, device=None): ''' Reads a minibatch that contains data for all input streams. The minibatch size is specified in terms of #samples and/or #sequences for the primary input stream; value of 0 for #samples/#sequences means unspecified. In case the size is specified in terms of both #sequences and #samples, the smaller of the 2 is taken. An empty map is returned when the MinibatchSource has no more data to return. Args: minibatch_size_in_samples (int): number of samples to retrieve for the next minibatch. Must be > 0. minibatch_size_in_sequences (int, defaults to `None`): number of samples to retrieve for the next minibatch. Must be > 0. input_map (dict): mapping of :class:`~cntk.ops.variabls.Variable` to :class:`StreamInformation` which will be used to convert the returned data. device (`DeviceDescriptor`, defaults to `None`): CNTK DeviceDescriptor Returns: A mapping of :class:`StramInformation` to :class:`MinibatchData` if ``input_map`` was not specified. Otherwise, the returned value will be a mapping of :class:`~cntk.ops.variabls.Variable` to class:`MinibatchData`. ''' if device is None: device = use_default_device() if minibatch_size_in_samples is None and \ minibatch_size_in_sequences is None: raise ValueError('you have to specify at least one of ' 'minibatch_size_in_samples or minibatch_size_in_sequences') if minibatch_size_in_sequences is None: mb = super(MinibatchSource, self).get_next_minibatch( minibatch_size_in_samples, device) else: if minibatch_size_in_samples is None: minibatch_size_in_samples = 0 mb = super(MinibatchSource, self).get_next_minibatch( minibatch_size_in_samples, minibatch_size_in_sequences, device) if input_map: if not mb: return None else: return { key : mb[value] for (key, value) in input_map.items() } else: return mb
def next_minibatch(self, minibatch_size_in_samples, input_map=None, device=None, num_data_partitions=None, partition_index=None): ''' Reads a minibatch that contains data for all input streams. The minibatch size is specified in terms of #samples and/or #sequences for the primary input stream; value of 0 for #samples/#sequences means unspecified. In case the size is specified in terms of both #sequences and #samples, the smaller of the 2 is taken. An empty map is returned when the MinibatchSource has no more data to return. Args: minibatch_size_in_samples (int): number of samples to retrieve for the next minibatch. Must be > 0. input_map (dict): mapping of :class:`~cntk.ops.variabls.Variable` to :class:`StreamInformation` which will be used to convert the returned data. device (`DeviceDescriptor`, defaults to `None`): CNTK DeviceDescriptor num_data_partitions: Used for distributed training, indicates into how many partitions the source should split the data. partition_index: Used for distributed training, indicates data from which partition to take. Returns: a mapping of :class:`StreamInformation` to :class:`MinibatchData` if ``input_map`` was not specified. Otherwise, the returned value will be a mapping of :class:`~cntk.ops.variables.Variable` to class:`MinibatchData`. ''' if device is None: device = use_default_device() if num_data_partitions is None: num_data_partitions = 1 if partition_index is None: partition_index = 0 mb = super(MinibatchSource, self).get_next_minibatch(0, minibatch_size_in_samples, num_data_partitions, partition_index, device) if input_map: if not mb: return {} else: return {key: mb[value] for (key, value) in input_map.items()} else: return mb
def one_hot(batch, num_classes, dtype=None, device=None): ''' Converts ``batch`` into a :class:`Value` object of ``dtype`` such that the integer data in ``batch`` is interpreted as the indices representing one-hot vectors. Example: >>> num_classes = 6 >>> sparse_indices = [[1,5],[4]] >>> i0 = C.input_variable(shape=num_classes, is_sparse=True) >>> z = C.times(i0, np.eye(num_classes)) >>> value = C.one_hot(sparse_indices, num_classes) >>> z.eval({i0: value}) [array([[ 0., 1., 0., 0., 0., 0.], [ 0., 0., 0., 0., 0., 1.]], dtype=float32), array([[ 0., 0., 0., 0., 1., 0.]], dtype=float32)] Args: batch (list of lists of integers): batch input data of indices num_classes (int): number of classes dtype (`np.float32`, `np.float64`, default None): data type device (:class:`~cntk.device.DeviceDescriptor`, default None): device this value should be put on Returns: ``batch`` converted into a :class:`~Value` object that can be passed to the forward or eval function. ''' if device is None: device = use_default_device() if isinstance(batch, np.ndarray): batch = batch.tolist() try: data_type = type(batch[0][0]) except: raise ValueError('input must be a list of list of integers') if data_type != int: raise ValueError('supplied data to one_hot() must be of type integer' ' and not "%s" since it is index data.' % data_type) if dtype in [np.float32, None]: value = cntk_py.Value.create_one_hot_float(num_classes, batch, device, False) elif dtype == np.float64: value = cntk_py.Value.create_one_hot_double(num_classes, batch, device, False) return value
def next_minibatch(self, minibatch_size_in_samples, input_map=None, device=None, num_data_partitions=None, partition_index=None): ''' Reads a minibatch that contains data for all input streams. The minibatch size is specified in terms of #samples and/or #sequences for the primary input stream; value of 0 for #samples/#sequences means unspecified. In case the size is specified in terms of both #sequences and #samples, the smaller of the 2 is taken. An empty map is returned when the MinibatchSource has no more data to return. Args: minibatch_size_in_samples (int): number of samples to retrieve for the next minibatch. Must be > 0. **Important:** Click `here <https://github.com/Microsoft/CNTK/wiki/BrainScript-epochSize-and-Python-epoch_size-in-CNTK>`_ for a full description of this parameter. input_map (dict): mapping of :class:`~cntk.ops.variables.Variable` to :class:`~cntk.cntk_py.StreamInformation` which will be used to convert the returned data. device (`DeviceDescriptor`, defaults to `None`): CNTK DeviceDescriptor num_data_partitions: Used for distributed training, indicates into how many partitions the source should split the data. partition_index (`int`, defaults to `None`): Used for distributed training, indicates data from which partition to take. Returns: cntk.io.MinibatchData: A mapping of :class:`~cntk.cntk_py.StreamInformation` to :class:`MinibatchData` if `input_map` was not specified. Otherwise, the returned value will be a mapping of :class:`~cntk.ops.variables.Variable` to class:`MinibatchData`. ''' if device is None: device = use_default_device() if num_data_partitions is None: num_data_partitions = 1 if partition_index is None: partition_index = 0 mb = super(MinibatchSource, self).get_next_minibatch(0, minibatch_size_in_samples, num_data_partitions, partition_index, device) if input_map: if not mb: return {} else: return { key : mb[value] for (key, value) in input_map.items() } else: return mb
def load_model(filename, device=None): ''' Load the network in ``filename``, that has been saved using `:func:save_model`. Args: filename (str): filename to load the model from device (:class:`~cntk.DeviceDescriptor`, default is the default device): instance of DeviceDescriptor Returns: root node ''' if not device: device = use_default_device() return cntk_py.Function.load_model(filename, device)
def one_hot(batch, num_classes, dtype=None, device=None): ''' Converts ``batch`` into a :class:`~cntk.core.Value` object of ``dtype`` such that the integer data in ``batch`` is interpreted as the indices representing one-hot vectors. Example: >>> num_classes = 6 >>> sparse_indices = [[1,5],[4]] >>> i0 = C.input_variable(shape=num_classes, is_sparse=True) >>> z = C.times(i0, np.eye(num_classes)) >>> value = C.one_hot(sparse_indices, num_classes) >>> z.eval({i0: value}) [array([[ 0., 1., 0., 0., 0., 0.], [ 0., 0., 0., 0., 0., 1.]], dtype=float32), array([[ 0., 0., 0., 0., 1., 0.]], dtype=float32)] Args: batch (list of lists of integers): batch input data of indices num_classes (int): number of classes dtype (`np.float32`, `np.float64`, default None): data type device (:class:`~cntk.device.DeviceDescriptor`, default None): device this value should be put on Returns: ``batch`` converted into a :class:`~Value` object that can be passed to the forward or eval function. ''' if device is None: device = use_default_device() if isinstance(batch, np.ndarray): batch = batch.tolist() try: data_type = type(batch[0][0]) except: raise ValueError('input must be a list of list of integers') if data_type != int: raise ValueError('supplied data to one_hot() must be of type integer' ' and not "%s" since it is index data.'%data_type) if dtype in [np.float32, None]: value = cntk_py.Value.create_one_hot_float(num_classes, batch, device, False) elif dtype == np.float64: value = cntk_py.Value.create_one_hot_double(num_classes, batch, device, False) return value
def __init__(self, shape=None, dtype=None, data=None, mask=None, device=None): if device is None: device = use_default_device() if shape and dtype: ndav = _create_NDArrayView(shape, dtype, device) elif data: if isinstance(data, np.ndarray): ndav = _create_NDArrayView_from_NumPy(data, device) else: ndav = data if mask: super(Value, self).__init__(ndav, mask) else: super(Value, self).__init__(ndav)
def load_model(data_type, filename, device=None): ''' Load the network in ``filename``, that has been saved using `:func:save_model`. Args: data_type ('float' or 'double', or NumPy type): data type of the operation filename (`str`): filename to load the model from device (:class:`cntk.device.DeviceDescriptor`, default to default device): instance of DeviceDescriptor Returns: root node ''' from cntk.utils import sanitize_dtype_cntk data_type = sanitize_dtype_cntk(data_type) if not device: device = use_default_device() return cntk_py.load_legacy_model(data_type, filename)
def load_model(data_type, filename, device=None): ''' Load the network in `model_file`, that has been saved using `:func:save_model`. Args: data_type ('float' or 'double', or NumPy type): data type of the operation filename (`str`): filename to load the model from device (:class:`cntk.DeviceDescriptor`, default to default device): instance of DeviceDescriptor Returns: root node ''' from cntk.utils import sanitize_dtype_cntk data_type = sanitize_dtype_cntk(data_type) if not device: device = use_default_device() return cntk_py.load_legacy_model(data_type, filename)
def __init__(self, shape=None, dtype=None, batch=None, seq_starts=None, device=None): if device is None: device = use_default_device() if shape and dtype: # FIXME is this needed? ndav = _create_NDArrayView(shape, dtype, device) elif batch: if isinstance(batch, np.ndarray): ndav = _create_NDArrayView_from_NumPy(batch, device) else: ndav = batch if seq_starts: super(Value, self).__init__(ndav, seq_starts) else: super(Value, self).__init__(ndav)
def load_model(filename, dtype=np.float32, device=None): ''' Load the network in ``filename``, that has been saved using `:func:save_model`. Args: filename (str): filename to load the model from dtype ('float', 'double', or NumPy type, default np.float32): data type of the operation device (:class:`~cntk.DeviceDescriptor`, default is the default device): instance of DeviceDescriptor Returns: root node ''' from cntk.utils import sanitize_dtype_cntk dtype = sanitize_dtype_cntk(dtype) if not device: device = use_default_device() return cntk_py.Function.load_model(dtype, filename, device)
def load_model(filename, dtype=np.float32, device=None): ''' Load the network in ``filename``, that has been saved using `:func:save_model`. Args: filename (`str`): filename to load the model from dtype ('float', 'double', or NumPy type, default ``np.float32``): data type of the operation device (:class:`cntk.DeviceDescriptor`, default is the default device): instance of DeviceDescriptor Returns: root node ''' from cntk.utils import sanitize_dtype_cntk dtype = sanitize_dtype_cntk(dtype) if not device: device = use_default_device() return cntk_py.Function.load_model(dtype, filename, device)
def one_hot(batch, num_classes, dtype=None, device=None): ''' Converts ``batch`` into a :class:`Value` object of ``dtype`` such that the integer data in ``batch`` is interpreted as the indices representing one-hot vectors. Additionally, a SciPy CSR matrix can be obtained by calling :meth:`~cntk.utils.Value.to_csr`. Example: >>> num_classes = 6 >>> sparse_indices = [[1,5],[4]] >>> i0 = C.input_variable(shape=num_classes, is_sparse=True) >>> z = C.times(i0, np.eye(num_classes)) >>> value = C.one_hot(sparse_indices, num_classes) >>> z.eval({i0: value}) [array([[ 0., 1., 0., 0., 0., 0.], [ 0., 0., 0., 0., 0., 1.]], dtype=float32), array([[ 0., 0., 0., 0., 1., 0.]], dtype=float32)] Args: batch (NumPy array or list (of lists, if sequence) of index data): batch input data num_classes (int): number of classes dtype (`np.float32`, `np.float64`, default None): data type device (:class:`~cntk.device.DeviceDescriptor`, default None): device this value should be put on Returns: ``batch`` converted into a :class:`~Value` object that can be passed to the forward or eval function. ''' if device is None: device = use_default_device() if isinstance(batch, np.ndarray): batch = batch.tolist() if dtype in [np.float32, None]: value = cntk_py.Value.create_one_hot_float(num_classes, batch, device, False) elif dtype == np.float64: value = cntk_py.Value.create_one_hot_double(num_classes, batch, device, False) return value
def sanitize_batch(var, batch, seq_starts=None, device=None): ''' Convert to :class:`~cntk.core.Value`. Args: var (:class:`~cntk.ops.variables.Variable`): input variable into which ``batch`` is passed batch: batch input for `var`. It can be * a single NumPy array denoting the full minibatch * a list of NumPy arrays or SciPy sparse CSR matrices each representing a sequence * a :class:`~cntk.core.Value` object (e.g. returned by :func:`one_hot`) seq_starts (list of `bool`s or None): if None, every sequence is treated as a new sequence. Otherwise, it is interpreted as a list of Booleans one for each sequence in the batch that tell whether a sequence is a new sequence (`True`) or a continuation of the sequence in the same slot of the previous minibatch (`False`) device (:class:`~cntk.device.DeviceDescriptor`, default None): device this value should be put on Returns: :class:`~cntk.core.Value`: converted batch that can be passed to the core API ''' if isinstance(batch, cntk_py.Value): if seq_starts is not None: raise ValueError('for directly passed Value objects sequence ' 'starts cannot be used yet.') return batch if seq_starts and len(var.dynamic_axes)<=1: raise ValueError('you specified sequence begin markers, but your ' 'input_variable does not contain a sequence axis.') if device is None: device = use_default_device() from .. import Value return Value.create(var, batch, seq_starts, device)
def create(var, batch, seq_starts=None, device=None, read_only=False): ''' Creates a :class:`Value` object. Args: var (:class:`~cntk.ops.variables.Variable`): input variable into which ``batch`` is passed batch: batch input. It can be * a single NumPy array denoting the full minibatch * a list of NumPy arrays or SciPy sparse CSR matrices seq_starts (list of `bool`s or None): if None, every sequence is treated as a new sequence. Otherwise, it is interpreted as a list of Booleans that tell whether a sequence is a new sequence (`True`) or a continuation of the sequence in the same slot of the previous minibatch (`False`) device (:class:`~cntk.device.DeviceDescriptor`, default None): device this value should be put on read_only (bool, default False): whether the data is read only Returns: :class:`Value` object. ''' if isinstance(batch, np.ndarray): # The outermost axis has to be Python list. If the user passes a # full minibatch as one NumPy array, we have to convert it. if batch.dtype == object: raise ValueError('dtype object is not supported. If this is a batch ' 'of sequences, you need to pass them as a pure-Python list ' 'of NumPy arrays') # FIXME if not seq_starts: directly pass it to Value constructor batch = list(batch) if not isinstance(batch, list): raise ValueError('batch has to be a list of NumPy arrays or ' 'SciPy CSR matrices') list_of_ndavs = [] # NDArrayViews are all created on CPU. The Value object later then will # move it to the requested device. cpu_dev = cpu() for sample in batch: if isinstance(sample, list): sample = np.asarray(sample, dtype=var.dtype) if sample.dtype != var.dtype: raise ValueError('could not convert sample data to ' 'NumPy array') if not (isinstance(sample, np.ndarray) or sparse.issparse(sample)): raise ValueError('sample type "%s" is not supported. Please ' 'provide the data as a Python list of NumPy arrays ' 'or Scipy CSR matrices.'%type(sample)) if np.issubdtype(sample.dtype, int): sample = sample.astype(var.dtype) elif sample.dtype not in (np.float32, np.float64): raise ValueError('only integer, float32 and float64 are supported, ' 'you gave %s'%sample.dtype) if isinstance(sample, np.ndarray): if not _is_c_contiguous(sample): raise ValueError('supplied data is not C contiguous; use ' 'np.ascontiguousarray (slow) or rearrange your data/computation') ndav = _create_NDArrayView_from_NumPy(sample, cpu_dev) elif sparse.issparse(sample): if not sparse.isspmatrix_csr(sample): raise ValueError("only CSR is supported as of now. Please " "convert your data using 'tocsr()'") ndav = cntk_py.NDArrayView(sample.shape, sample.data, sample.indptr, sample.indices, cpu_dev, False) list_of_ndavs.append(ndav) return cntk_py.Value_create( _as_tuple(var.shape), list_of_ndavs, seq_starts or [], device or use_default_device(), read_only)
def _create_NDArrayView_from_NumPy(nd, device=None): if device is None: device = use_default_device() return cntk_py.NDArrayView(nd, device, False)
def sanitize_batch(var, batch, seq_starts=None, dtype=None, device=None): ''' Convert to :class:`Value` with ``dtype``. If the samples in ``batch`` have different sequence lengths, pad them to max sequence length and create a mask. Args: var (:class:`~cntk.ops.variables.Variable`): variable node for which the ``batch`` is meant batch: batch input for `var`. It can be a pure Python structure (list of lists, ...), a combination of lists of NumPy arrays or SciPy sparse CSR matrices. Alternatively, it can also be the output of :func:`one_hot`. seq_starts (list of `bool`s or None): if None, every sequence is treated as a new sequence. Otherwise, it is interpreted as a list of Booleans that tell whether a sequence is a new sequence (`True`) or a continuation of the sequence in the same slot of the previous minibatch (`False`) device (:class:`~cntk.device.DeviceDescriptor`, default None): device this value should be put on Returns: :class:`Value`: converted batch that can be passed to the core API ''' if isinstance(batch, cntk_py.Value): return batch if isinstance(batch, list): if len(batch) == 0: raise ValueError('batch is empty') if isinstance(batch, np.ndarray) and batch.dtype == object: raise ValueError('dtype object is not supported. If this is a batch ' 'of sequences, you need to pass them as a pure-Python list ' 'of NumPy arrays') # We need to figure out whether the data has a sequence axis. Note that # it is not enough to check whether the variable's dynamic axes include the # sequence axis, because the sequence axis might be omitted in the data if # it is not needed (CNTK core would then take care of this). batch_has_seq = _has_seq_dim(var, batch) is_dense = _is_dense(batch) if batch_has_seq or seq_starts: if isinstance(batch[0], list): seq_lens = [len(seq) for seq in batch] else: seq_lens = [seq.shape[0] for seq in batch] max_seq_len = max(seq_lens) # If the input is a list of lists of dense values, all of the same # length, we convert it into a NumPy array. if is_dense and len(set(seq_lens)) == 1: batch_has_seq = False batch = np.asarray(batch, dtype=var.dtype) if dtype is None: dtype = get_data_type(var) if device is None: device = use_default_device() # batch is now either a dense input that requires a mask, or it is sparse if batch_has_seq or seq_starts: mask = cntk_py.NDMask((len(batch), max_seq_len), device or use_default_device()) for idx, seq_len in enumerate(seq_lens): if seq_starts is None or seq_starts[idx]: mask.mark_sequence_begin((0, idx)) # The first parameter is provided as a vector of ints, and thus # won't be automatically reversed to col-major, because of which we # provide it as such. # The second parameter is specifying the rectangle of the mask that # is invalid. As C++ is taking an NDShape, and we reverse the shape # in the SWIG layer, we provide it here as row-major. mask.invalidate_section((seq_len, idx), (1, cntk_py.InferredDimension)) else: mask = None if is_dense: if batch_has_seq: batch = _pad_dense_to_max_len(var, batch, max_seq_len) if not isinstance(batch, np.ndarray): batch = np.asarray(batch) ndav = _create_NDArrayView_from_NumPy(batch.astype(dtype), device) return Value(data=ndav, mask=mask) if isinstance(batch, np.ndarray): if np.issubdtype(batch.dtype, int): batch = batch.astype(var.dtype) elif batch.dtype not in (np.float32, np.float64): raise ValueError('only float32 and float64 are supported') ndav = _create_NDArrayView_from_NumPy(batch.astype(dtype), device) return Value(data=ndav, mask=mask) # There are three possibilities of providing sparse batches: # 1. batch is given as one big sparse array batch_is_sparse = sparse.issparse(batch) if batch_is_sparse: sparse_tmp = batch else: # 2. batch is given as a list of sparse arrays, each of which is a full # sequence batch_has_sparse_sequences = batch_is_sparse or sparse.issparse(batch[0]) if batch_has_sparse_sequences: sparse_tmp = batch[0] else: # 3. batch is given as a list of lists containing the sparse sequence # elements batch_has_sparse_elements = batch_has_sparse_sequences or \ sparse.issparse(batch[0][0]) if batch_has_sparse_elements: sparse_tmp = batch[0][0] if not sparse.isspmatrix_csr(sparse_tmp): raise ValueError("only CSR is supported as of now. Please " "convert your data using 'batch.tocsr()'") if batch_is_sparse or batch_has_sparse_sequences or \ batch_has_sparse_elements: batch_shape = batch.shape if hasattr(batch, 'shape') else (len(batch),) sample_shape = var.shape if not batch_is_sparse: # batch is not one big sparse matrix, but a list of them (or a list # of lists of them), so we have to create one. Two possibilities: # 1. Batch has sequence axis: only 1d sparse vectors are allowed. # 2. Ohterwise, 1d or 2d sparse tensors are allowed if batch_has_seq: shape = batch[0][0].shape if not (len(shape)==1 or len(shape)==2 and shape[0]==1): raise ValueError('only 1D sparse vectors are supported in ' ' sequence data, you gave shape %s'%str(shape)) # Pad and stack the sparse vectors. if batch_has_seq: batch = _pad_sparse_seq_to_max_len(batch, max_seq_len) batch_shape += (max_seq_len,) # We are actually 1D. If rank==2, then the first dim is 1. sample_shape = sample_shape[-1] else: sample_shape = batch[0][0].shape if len(sample_shape) not in [1,2]: raise ValueError('only 1D or 2D sparse vectors are supported') # Vertically stack sequences/samples batch = sparse.vstack(batch, format='csr') batch_shape += _as_tuple(sample_shape) ndav = cntk_py.NDArrayView(batch_shape, batch.data.astype(var.dtype), batch.indptr, batch.indices, device, False) return Value(data=ndav, mask=mask) else: raise ValueError('batch input not understood')
def sanitize_batch(var, batch, seq_starts=None, data_type=None, device=None): ''' Convert to :class:`cntk.cntk_py.Value` with ``data_type``. If the samples in ``batch`` have different sequence lengths, pad them to max sequence length and create a mask. Args: var (:class:`cntk.ops.variables.Variable`): variable node for which the ``batch`` is meant batch (`list` of NumPy arrays): input seq_starts (`list` of `bool` or `None`): if `None`, every sequence is treated as a new sequence. Otherwise, it is interpreted as a list of Booleans that tell whether a sequence is a new sequence (`True`) or a continuation of the previous one (`False`) Returns: :class:`cntk.cntk_py.Value`: converted batch ''' from ..cntk_py import Value if isinstance(batch, Value): return batch use_mask = False if isinstance(batch, np.ndarray): if batch.dtype == np.int: batch = batch.astype(np.float32) elif batch.dtype not in (np.float32, np.float64): raise ValueError('only float32 and float64 are supported') elif isinstance(batch, list): if is_tensor_list(batch): use_mask = len(var.dynamic_axes) > 1 if device is None: device = use_default_device() if not use_mask and seq_starts is not None: raise ValueError('specification of individual sequence begins does not' ' make sense when not using the sequence axis') # Use the mask, if we have additional dynamic axes besides the batch axis if use_mask: seq_lens = [len(seq) for seq in batch] try: num_seq = len(batch) except TypeError: raise ValueError('expected an object of type Value or a NumPy ' + 'array and not "%s"' % type(batch)) from cntk.cntk_py import NDMask mask = NDMask((max(seq_lens), num_seq), device) for idx, seq_len in enumerate(seq_lens): if seq_starts is None: mask.mark_sequence_begin((0, idx)) elif seq_starts[idx]: mask.mark_sequence_begin((0, idx)) mask.invalidate_section((seq_len, idx), (cntk_py.InferredDimension, 1)) # Then we pad the batch to rectangular shape if isinstance(batch, list): if len(batch) == 0: raise ValueError('batch is empty') batch = pad_to_dense(batch) # If it still is not an NumPy array, try brute force... if not isinstance(batch, np.ndarray): if data_type is None: data_type = get_data_type(var) batch = np.asarray(batch, dtype=data_type) # Maybe a NumPy dtype was given, but with lower accuracy than float32, then # convert it to float32 if np.issubdtype(batch.dtype, int): batch = batch.astype(np.float32) if len(cntk_shape) == 0: raise ValueError('values should be an array of input samples') ndav = create_NDArrayView_from_NumPy(batch, device) if use_mask: value = Value(ndav, mask) else: value = Value(ndav) return value
def init_train_fast_rcnn(image_height, image_width, num_classes, num_rois, mb_size, max_epochs, cntk_lr_per_image, l2_reg_weight, momentum_time_constant, base_path, boSkipTraining = False, debug_output=False, tensorboardLogDir = None): #make sure we use GPU for training if use_default_device().type() == 0: print("WARNING: using CPU for training.") else: print("Using GPU for training.") # Instantiate the Fast R-CNN prediction model image_input = input_variable((3, image_height, image_width)) roi_input = input_variable((num_rois, 4)) label_input = input_variable((num_rois, num_classes)) frcn_output, frcn_penultimateLayer = frcn_predictor(image_input, roi_input, num_classes, base_path) if boSkipTraining: print("Using pre-trained DNN without refinement") return frcn_penultimateLayer # Create the minibatch source and define mapping from reader streams to network inputs minibatch_source, epoch_size = create_mb_source("train", image_height, image_width, num_classes, num_rois, base_path, randomize=True) input_map = { image_input: minibatch_source.streams.features, roi_input: minibatch_source.streams.rois, label_input: minibatch_source.streams.roiLabels } # set loss / error functions ce = cross_entropy_with_softmax(frcn_output, label_input, axis=1) pe = classification_error(frcn_output, label_input, axis=1) if debug_output: plot(frcn_output, "graph_frcn.png") # set the progress printer(s) progress_writers = [ProgressPrinter(tag='Training', num_epochs=max_epochs)] if tensorboardLogDir != None: tensorboard_writer = TensorBoardProgressWriter(freq=10, log_dir=tensorboardLogDir, model=frcn_output) progress_writers.append(tensorboard_writer) # Set learning parameters and instantiate the trainer object lr_per_sample = [f/float(num_rois) for f in cntk_lr_per_image] lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample) mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant) learner = momentum_sgd(frcn_output.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) trainer = Trainer(frcn_output, (ce, pe), learner, progress_writers) # Get minibatches of images and perform model training print("Training Fast R-CNN model for %s epochs." % max_epochs) log_number_of_parameters(frcn_output) for epoch in range(max_epochs): sample_count = 0 # loop over minibatches in the epoch while sample_count < epoch_size: data = minibatch_source.next_minibatch(min(mb_size, epoch_size - sample_count), input_map=input_map) if sample_count % 100 == 1: print("Training in progress: epoch {} of {}, sample count {} of {}".format(epoch, max_epochs, sample_count, epoch_size)) trainer.train_minibatch(data) sample_count += trainer.previous_minibatch_sample_count # count samples processed so far trainer.summarize_training_progress() # Log mean of each parameter tensor, so that we can confirm that the parameters change indeed. if tensorboardLogDir != None: for parameter in frcn_output.parameters: tensorboard_writer.write_value(parameter.uid + "/mean", np.mean(parameter.value), epoch) tensorboard_writer.write_value(parameter.uid + "/std", np.std(parameter.value), epoch) tensorboard_writer.write_value(parameter.uid + "/absSum", np.sum(np.abs(parameter.value)), epoch) if debug_output: frcn_output.save_model("frcn_py_%s.model" % (epoch + 1)) return frcn_output
def create_NDArrayView_from_NumPy(nd, dev=None): if not dev: dev = use_default_device() return cntk_py.NDArrayView(nd, dev, False)
prediction_algorithm_name = 'Easy' print("СКРИПТ ПОТОЧНОГО ПРОГНОЗИРОВАНИЯ " + prediction_algorithm_name + " ЗАПУЩЕН...") import random random.seed() session = random.getrandbits(16) print("session = " + (str)(session)) from cntk.device import try_set_default_device, gpu import cntk.device as C print(C.all_devices()) print(C.try_set_default_device(C.gpu(0))) print(C.use_default_device()) import time import sys import argparse import numpy from datetime import datetime from keras.models import load_model import json #print(sys.platform) def createParser(): parser = argparse.ArgumentParser() #parser.add_argument('--json_file_path',type=str,default='D:\Anton\Desktop\MAIN\Экспертная система\Экспертная система\Алгоритмы прогнозирования\LSTM 1\h.json') parser.add_argument('--json_file_path', type=str) return parser
def sanitize_batch(var, batch, seq_starts=None, dtype=None, device=None): ''' Convert to :class:`Value` with ``dtype``. If the samples in ``batch`` have different sequence lengths, pad them to max sequence length and create a mask. Args: var (:class:`~cntk.ops.variables.Variable`): variable node for which the ``batch`` is meant batch: batch input for `var`. It can be a pure Python structure (list of lists, ...), a combination of lists of NumPy arrays or SciPy sparse CSR matrices. Alternatively, it can also be the output of :func:`one_hot`. seq_starts (list of bool or None): if None, every sequence is treated as a new sequence. Otherwise, it is interpreted as a list of Booleans that tell whether a sequence is a new sequence (`True`) or a continuation of the previous one (`False`) device (:class:`~cntk.device.DeviceDescriptor`, default None): device this value should be put on Returns: :class:`Value`: converted batch that can be passed to the core API ''' if isinstance(batch, cntk_py.Value): return batch if isinstance(batch, list): if len(batch) == 0: raise ValueError('batch is empty') # We need to figure out whether the data has a sequence axis. Note that # it is not enough to check whether the variable's dynamic axes include the # sequence axis, because the sequence axis might be omitted in the data if # it is not needed (CNTK core would then take care of this). batch_has_seq = _has_seq_dim(var, batch) is_dense = _is_dense(batch) if batch_has_seq or seq_starts: if isinstance(batch[0], list): seq_lens = [len(seq) for seq in batch] else: seq_lens = [seq.shape[0] for seq in batch] max_seq_len = max(seq_lens) # If the input is a list of lists of dense values, all of the same # length, we convert it into a NumPy array. if is_dense and len(set(seq_lens)) == 1: batch_has_seq = False batch = np.asarray(batch, dtype=var.dtype) if dtype is None: dtype = get_data_type(var) if device is None: device = use_default_device() # batch is now either a dense input that requires a mask, or it is sparse if batch_has_seq or seq_starts: mask = cntk_py.NDMask((len(batch), max_seq_len), device or use_default_device()) for idx, seq_len in enumerate(seq_lens): if seq_starts is None or seq_starts[idx]: mask.mark_sequence_begin((0, idx)) # The first parameter is provided as a vector of ints, and thus # won't be automatically reversed to col-major, because of which we # provide it as such. # The second parameter is specifying the rectangle of the mask that # is invalid. As C++ is taking an NDShape, and we reverse the shape # in the SWIG layer, we provide it here as row-major. mask.invalidate_section((seq_len, idx), (1, cntk_py.InferredDimension)) else: mask = None if is_dense: if batch_has_seq: batch = _pad_dense_to_max_len(var, batch, max_seq_len) if not isinstance(batch, np.ndarray): batch = np.asarray(batch) ndav = _create_NDArrayView_from_NumPy(batch.astype(dtype), device) return Value(data=ndav, mask=mask) if isinstance(batch, np.ndarray): if np.issubdtype(batch.dtype, int): batch = batch.astype(var.dtype) elif batch.dtype not in (np.float32, np.float64): raise ValueError('only float32 and float64 are supported') ndav = _create_NDArrayView_from_NumPy(batch.astype(dtype), device) return Value(data=ndav, mask=mask) # There are three possibilities of providing sparse batches: # 1. batch is given as one big sparse array batch_is_sparse = sparse.issparse(batch) if batch_is_sparse: sparse_tmp = batch else: # 2. batch is given as a list of sparse arrays, each of which is a full # sequence batch_has_sparse_sequences = batch_is_sparse or sparse.issparse(batch[0]) if batch_has_sparse_sequences: sparse_tmp = batch[0] else: # 3. batch is given as a list of lists containing the sparse sequence # elements batch_has_sparse_elements = batch_has_sparse_sequences or \ sparse.issparse(batch[0][0]) if batch_has_sparse_elements: sparse_tmp = batch[0][0] if not sparse.isspmatrix_csr(sparse_tmp): raise ValueError("only CSR is supported as of now. Please " "convert your data using 'batch.tocsr()'") if batch_is_sparse or batch_has_sparse_sequences or \ batch_has_sparse_elements: batch_shape = batch.shape if hasattr(batch, 'shape') else (len(batch),) sample_shape = var.shape if not batch_is_sparse: # batch is not one big sparse matrix, but a list of them (or a list # of lists of them), so we have to create one. Two possibilities: # 1. Batch has sequence axis: only 1d sparse vectors are allowed. # 2. Ohterwise, 1d or 2d sparse tensors are allowed if batch_has_seq: shape = batch[0][0].shape if not (len(shape)==1 or len(shape)==2 and shape[0]==1): raise ValueError('only 1D sparse vectors are supported in ' ' sequence data, you gave shape %s'%str(shape)) # Pad and stack the sparse vectors. if batch_has_seq: batch = _pad_sparse_seq_to_max_len(batch, max_seq_len) batch_shape += (max_seq_len,) # We are actually 1D. If rank==2, then the first dim is 1. sample_shape = sample_shape[-1] else: sample_shape = batch[0][0].shape if len(sample_shape) not in [1,2]: raise ValueError('only 1D or 2D sparse vectors are supported') # Vertically stack sequences/samples batch = sparse.vstack(batch, format='csr') batch_shape += _as_tuple(sample_shape) ndav = cntk_py.NDArrayView(batch_shape, batch.data.astype(var.dtype), batch.indptr, batch.indices, device, False) return Value(data=ndav, mask=mask) else: raise ValueError('batch input not understood')
def create(var, batch, seq_starts=None, device=None, read_only=False): ''' Creates a :class:`Value` object. Args: var (:class:`~cntk.ops.variables.Variable`): input variable into which ``batch`` is passed batch: batch input. It can be * a single NumPy array denoting the full minibatch * a list of NumPy arrays or SciPy sparse CSR matrices seq_starts (list of `bool`s or None): if None, every sequence is treated as a new sequence. Otherwise, it is interpreted as a list of Booleans that tell whether a sequence is a new sequence (`True`) or a continuation of the sequence in the same slot of the previous minibatch (`False`) device (:class:`~cntk.device.DeviceDescriptor`, default None): device this value should be put on read_only (bool, default False): whether the data is read only Returns: :class:`Value` object. ''' if isinstance(batch, np.ndarray): # The outermost axis has to be Python list. If the user passes a # full minibatch as one NumPy array, we have to convert it. if batch.dtype == object: raise ValueError( 'dtype object is not supported. If this is a batch ' 'of sequences, you need to pass them as a pure-Python list ' 'of NumPy arrays') # FIXME if not seq_starts: directly pass it to Value constructor batch = list(batch) if not isinstance(batch, list): raise ValueError('batch has to be a list of NumPy arrays or ' 'SciPy CSR matrices') list_of_ndavs = [] # NDArrayViews are all created on CPU. The Value object later then will # move it to the requested device. cpu_dev = cpu() for sample in batch: if isinstance(sample, list): sample = np.asarray(sample, dtype=var.dtype) if sample.dtype != var.dtype: raise ValueError('could not convert sample data to ' 'NumPy array') if not (isinstance(sample, np.ndarray) or sparse.issparse(sample)): raise ValueError( 'sample type "%s" is not supported. Please ' 'provide the data as a Python list of NumPy arrays ' 'or Scipy CSR matrices.' % type(sample)) if np.issubdtype(sample.dtype, int): sample = sample.astype(var.dtype) elif sample.dtype not in (np.float32, np.float64): raise ValueError( 'only integer, float32 and float64 are supported, ' 'you gave %s' % sample.dtype) else: sample = sample.astype(var.dtype) if isinstance(sample, np.ndarray): if not _is_c_contiguous(sample): raise ValueError( 'supplied data is not C contiguous; use ' 'np.ascontiguousarray (slow) or rearrange your data/computation' ) ndav = _create_NDArrayView_from_NumPy(sample, cpu_dev) elif sparse.issparse(sample): if not sparse.isspmatrix_csr(sample): raise ValueError("only CSR is supported as of now. Please " "convert your data using 'tocsr()'") ndav = cntk_py.NDArrayView(sample.shape, sample.data, sample.indptr, sample.indices, cpu_dev, False) list_of_ndavs.append(ndav) return cntk_py.Value_create(_as_tuple(var.shape), list_of_ndavs, seq_starts or [], device or use_default_device(), read_only)