コード例 #1
0
ファイル: __init__.py プロジェクト: ashionlrq/CNTK
    def next_minibatch(self, minibatch_size_in_samples,
                       input_map=None, device=None, num_data_partitions=None,
                       partition_index=None):
        '''
        Reads a minibatch that contains data for all input streams.  The
        minibatch size is specified in terms of #samples and/or #sequences for
        the primary input stream; value of 0 for #samples/#sequences means
        unspecified.  In case the size is specified in terms of both #sequences
        and #samples, the smaller of the 2 is taken.  An empty map is returned
        when the MinibatchSource has no more data to return.

        Args:
            minibatch_size_in_samples (int): number of samples to retrieve for
              the next minibatch. Must be > 0.
              **Important:**
              Click :cntkwiki:`here <BrainScript-minibatchSize-and-Python-minibatch_size_in_samples-in-CNTK>` for a full description of this parameter.
            input_map (dict): mapping of :class:`~cntk.variables.Variable`
              to :class:`StreamInformation` which will be used to convert the
              returned data.
            device (`DeviceDescriptor`, defaults to `None`): CNTK DeviceDescriptor
            num_data_partitions: Used for distributed training, indicates into how many partitions
              the source should split the data.
            partition_index (`int`, defaults to `None`): Used for distributed training, indicates data from which partition to take.

        Returns:
            cntk.io.MinibatchData:
             A mapping of :class:`StreamInformation` to :class:`MinibatchData` if
             `input_map` was not specified. Otherwise, the returned value will
             be a mapping of :class:`~cntk.variables.Variable` to class:`MinibatchData`.
             When the maximum number of epochs/samples is exhausted, the return value is an empty dict.
        '''
        if self._last_mb_data is not None:
            self._last_mb_data.clear()

        if device is None:
            device = use_default_device()

        if num_data_partitions is None:
            num_data_partitions = 1

        if partition_index is None:
            partition_index = 0

        parent_inst = super(MinibatchSource, self)
        mb = parent_inst.get_next_minibatch(0,
                                            minibatch_size_in_samples,
                                            num_data_partitions,
                                            partition_index, device)

        if not mb:
            return mb

        if not input_map:
            return mb

        # We copy minibatch data here,
        # we need to make sure it is cleaned when next_minibatch
        # is called next time.       
        self._last_mb_data = {key: mb[value] for (key, value) in input_map.items()}
        return self._last_mb_data
コード例 #2
0
    def __init__(self,
                 shape=None,
                 init=None,
                 dtype=None,
                 device=None,
                 name=''):
        if not device:
            device = use_default_device()

        if dtype is not None:
            if isinstance(init, np.ndarray) and dtype != init.dtype:
                init = np.array(init, dtype=dtype)
        else:
            if np.isscalar(init) and not shape:
                shape = ()
            if isinstance(init, np.ndarray):
                dtype = init.dtype
            else:
                dtype = np.float32

        if init is None:
            init = 0

        if isinstance(init, (np.ndarray, list, float, int)):
            ndav = sanitize_value(shape, init, dtype, device)
            super(Parameter, self).__init__(ndav, name)
        else:
            shape = sanitize_shape(shape)
            cntk_dtype = sanitize_dtype_cntk(dtype)
            super(Parameter, self).__init__(shape, cntk_dtype, init, device,
                                            name)
コード例 #3
0
    def __init__(self,
                 value=None,
                 shape=None,
                 dtype=None,
                 device=None,
                 name=''):

        if not device:
            device = use_default_device()

        if (np.isscalar(value) or isinstance(value, np.ndarray)) and not shape:
            shape = ()

        if dtype is not None:
            if isinstance(value, np.ndarray) and dtype != value.dtype:
                value = np.array(value, dtype=dtype)
        else:
            if isinstance(value, np.ndarray):
                dtype = value.dtype
            else:
                dtype = np.float32

        if np.isscalar(value):
            super(Constant, self).__init__(sanitize_shape(shape),
                                           sanitize_dtype_cntk(dtype), value,
                                           device, name)
        else:
            ndav = sanitize_value(shape, value, dtype, device)
            super(Constant, self).__init__(ndav, name)
コード例 #4
0
ファイル: __init__.py プロジェクト: delpart/CNTK
    def next_minibatch(self, minibatch_size_in_samples,
                       input_map=None, device=None, num_data_partitions=None,
                       partition_index=None):
        '''
        Reads a minibatch that contains data for all input streams.  The
        minibatch size is specified in terms of #samples and/or #sequences for
        the primary input stream; value of 0 for #samples/#sequences means
        unspecified.  In case the size is specified in terms of both #sequences
        and #samples, the smaller of the 2 is taken.  An empty map is returned
        when the MinibatchSource has no more data to return.

        Args:
            minibatch_size_in_samples (int): number of samples to retrieve for
              the next minibatch. Must be > 0.
              **Important:**
              Click :cntkwiki:`here <BrainScript-minibatchSize-and-Python-minibatch_size_in_samples-in-CNTK>` for a full description of this parameter.
            input_map (dict): mapping of :class:`~cntk.variables.Variable`
              to :class:`StreamInformation` which will be used to convert the
              returned data.
            device (`DeviceDescriptor`, defaults to `None`): CNTK DeviceDescriptor
            num_data_partitions: Used for distributed training, indicates into how many partitions
              the source should split the data.
            partition_index (`int`, defaults to `None`): Used for distributed training, indicates data from which partition to take.

        Returns:
            cntk.io.MinibatchData:
             A mapping of :class:`StreamInformation` to :class:`MinibatchData` if
             `input_map` was not specified. Otherwise, the returned value will
             be a mapping of :class:`~cntk.variables.Variable` to class:`MinibatchData`.
             When the maximum number of epochs/samples is exhausted, the return value is an empty dict.
        '''
        if self._last_mb_data is not None:
            self._last_mb_data.clear()

        if device is None:
            device = use_default_device()

        if num_data_partitions is None:
            num_data_partitions = 1

        if partition_index is None:
            partition_index = 0

        parent_inst = super(MinibatchSource, self)
        mb = parent_inst.get_next_minibatch(0,
                                            minibatch_size_in_samples,
                                            num_data_partitions,
                                            partition_index, device)

        if not mb:
            return mb

        if not input_map:
            return mb

        # We copy minibatch data here,
        # we need to make sure it is cleaned when next_minibatch
        # is called next time.       
        self._last_mb_data = {key: mb[value] for (key, value) in input_map.items()}
        return self._last_mb_data
コード例 #5
0
    def get_next_minibatch(self,
                           minibatch_size_in_samples,
                           minibatch_size_in_sequences=None,
                           device=None):
        '''
        Reads a minibatch that contains data for all input streams.
        The minibatch size is specified terms of #samples and/or #sequences for the primary input stream; value of 0 for #samples/#sequences means unspecified.
        In case the size is specified in terms of both #sequences and #samples, the smaller of the 2 is taken.
        An empty map is returned when the MinibatchSource has no more data to return.''

        Args:
            minibatch_size_in_samples (`int`): number of samples to retrieve for
             the next minibatch. Must be > 0.
            minibatch_size_in_sequences (`int`, defaults to `None`): number of
             samples to retrieve for the next minibatch. Must be > 0. 
            device (`DeviceDescriptor`, defaults to `None`): CNTK DeviceDescriptor

        Returns:
            `:class:MinibatchData`
        '''
        if device is None:
            device = use_default_device()

        if minibatch_size_in_sequences is None:
            return super(MinibatchSource,
                         self).get_next_minibatch(minibatch_size_in_samples,
                                                  device)
        else:
            return super(MinibatchSource,
                         self).get_next_minibatch(minibatch_size_in_samples,
                                                  minibatch_size_in_sequences,
                                                  device)
コード例 #6
0
ファイル: __init__.py プロジェクト: nagyistge/Microsoft.CNTK
def one_hot(batch, num_classes, dtype=None, device=None):
    '''
    Converts ``batch`` into a :class:`Value` object of ``dtype``
    such that the integer data in ``batch`` is interpreted as the indices
    representing one-hot vectors.

    Args:
        batch (list (of lists, if sequence) of index data): batch input data
        num_classes (int): number of classes
        dtype (`np.float32`, `np.float64`, default None): data type
        device (:class:`~cntk.device.DeviceDescriptor`, default None): device
         this value should be put on

    Returns:
        ``batch`` converted into a :class:`~Value` object that can be passed to
        the forward or eval function.
    '''
    if device is None:
        device = use_default_device()

    if dtype in [np.float32, None]:
        value = cntk_py.Value.create_one_hot_float(num_classes, batch, device,
                                                   False)
    elif dtype == np.float64:
        value = cntk_py.Value.create_one_hot_double(num_classes, batch, device,
                                                    False)
    return value
コード例 #7
0
ファイル: __init__.py プロジェクト: hahatt/CNTK
    def get_next_minibatch(self, minibatch_size_in_samples,
            minibatch_size_in_sequences=None, device=None):
        '''
        Reads a minibatch that contains data for all input streams.
        The minibatch size is specified terms of #samples and/or #sequences for the primary input stream; value of 0 for #samples/#sequences means unspecified.
        In case the size is specified in terms of both #sequences and #samples, the smaller of the 2 is taken.
        An empty map is returned when the MinibatchSource has no more data to return.''

        Args:
            minibatch_size_in_samples (`int`): number of samples to retrieve for
             the next minibatch. Must be > 0.
            minibatch_size_in_sequences (`int`, defaults to `None`): number of
             samples to retrieve for the next minibatch. Must be > 0. 
            device (`DeviceDescriptor`, defaults to `None`): CNTK DeviceDescriptor

        Returns:
            `:class:MinibatchData`
        '''
        if device is None:
            device = use_default_device()

        if minibatch_size_in_sequences is None:
            return super(MinibatchSource, self).get_next_minibatch(
                minibatch_size_in_samples, device)
        else:
            return super(MinibatchSource, self).get_next_minibatch(
                minibatch_size_in_samples,
                minibatch_size_in_sequences, device)
コード例 #8
0
ファイル: __init__.py プロジェクト: Microsoft/CNTK
def _create_NDArrayView(shape, data_type=cntk_py.DataType_Float, device=None):
    shape = sanitize_shape(shape)
    if device is None:
        device = use_default_device()
    # FIXME only dense supported so far
    view = cntk_py.NDArrayView(data_type, cntk_py.StorageFormat_Dense, shape,
            device)
    return view
コード例 #9
0
def _create_NDArrayView(shape, data_type=cntk_py.DataType_Float, device=None):
    shape = sanitize_shape(shape)
    if device is None:
        device = use_default_device()
    # FIXME only dense supported so far
    view = cntk_py.NDArrayView(data_type, cntk_py.StorageFormat_Dense, shape,
                               device)
    return view
コード例 #10
0
    def next_minibatch(self,
                       minibatch_size_in_samples=None,
                       minibatch_size_in_sequences=None,
                       input_map=None,
                       device=None):
        '''
        Reads a minibatch that contains data for all input streams.  The
        minibatch size is specified in terms of #samples and/or #sequences for the
        primary input stream; value of 0 for #samples/#sequences means
        unspecified.  In case the size is specified in terms of both #sequences
        and #samples, the smaller of the 2 is taken.  An empty map is returned
        when the MinibatchSource has no more data to return.

        Args:
            minibatch_size_in_samples (int): number of samples to retrieve for
             the next minibatch. Must be > 0.
            minibatch_size_in_sequences (int, defaults to `None`): number of
             samples to retrieve for the next minibatch. Must be > 0.
            input_map (dict): mapping of :class:`~cntk.ops.variabls.Variable`
             to :class:`StreamInformation` which will be used to convert the
             returned data.
            device (`DeviceDescriptor`, defaults to `None`): CNTK DeviceDescriptor

        Returns:
            A mapping of :class:`StramInformation` to :class:`MinibatchData` if
            ``input_map`` was not specified. Otherwise, the returned value will
            be a mapping of :class:`~cntk.ops.variabls.Variable` to class:`MinibatchData`.
        '''
        if device is None:
            device = use_default_device()

        if minibatch_size_in_samples is None and \
                minibatch_size_in_sequences is None:
            raise ValueError(
                'you have to specify at least one of '
                'minibatch_size_in_samples or minibatch_size_in_sequences')

        if minibatch_size_in_sequences is None:
            mb = super(MinibatchSource,
                       self).get_next_minibatch(minibatch_size_in_samples,
                                                device)
        else:
            if minibatch_size_in_samples is None:
                minibatch_size_in_samples = 0

            mb = super(MinibatchSource,
                       self).get_next_minibatch(minibatch_size_in_samples,
                                                minibatch_size_in_sequences,
                                                device)

        if input_map:
            if not mb:
                return None
            else:
                return {key: mb[value] for (key, value) in input_map.items()}
        else:
            return mb
コード例 #11
0
    def next_minibatch(self,
                       minibatch_size_in_samples,
                       input_map=None,
                       device=None,
                       num_data_partitions=None,
                       partition_index=None):
        '''
        Reads a minibatch that contains data for all input streams.  The
        minibatch size is specified in terms of #samples and/or #sequences for the
        primary input stream; value of 0 for #samples/#sequences means
        unspecified.  In case the size is specified in terms of both #sequences
        and #samples, the smaller of the 2 is taken.  An empty map is returned
        when the MinibatchSource has no more data to return.

        Args:
            minibatch_size_in_samples (int): number of samples to retrieve for
              the next minibatch. Must be > 0.
              **Important:**
              Click `here <https://github.com/Microsoft/CNTK/wiki/BrainScript-minibatchSize-and-Python-minibatch_size_in_samples-in-CNTK>`__ for a full description of this parameter. 
            input_map (dict): mapping of :class:`~cntk.variables.Variable`
              to :class:`~cntk.cntk_py.StreamInformation` which will be used to convert the
              returned data.
            device (`DeviceDescriptor`, defaults to `None`): CNTK DeviceDescriptor
            num_data_partitions: Used for distributed training, indicates into how many partitions
              the source should split the data.
            partition_index (`int`, defaults to `None`): Used for distributed training, indicates data from which partition to take.

        Returns:
            cntk.io.MinibatchData:
            A mapping of :class:`~cntk.cntk_py.StreamInformation` to :class:`MinibatchData` if
            `input_map` was not specified. Otherwise, the returned value will
            be a mapping of :class:`~cntk.variables.Variable` to class:`MinibatchData`.
        '''
        if device is None:
            device = use_default_device()

        if num_data_partitions is None:
            num_data_partitions = 1

        if partition_index is None:
            partition_index = 0

        mb = super(MinibatchSource,
                   self).get_next_minibatch(0, minibatch_size_in_samples,
                                            num_data_partitions,
                                            partition_index, device)

        if not mb:
            return mb

        if not input_map:
            return mb

        return {key: mb[value] for (key, value) in input_map.items()}
コード例 #12
0
ファイル: __init__.py プロジェクト: Soukiy/CNTK
    def next_minibatch(self, minibatch_size_in_samples=None,
            minibatch_size_in_sequences=None, input_map=None,
            device=None):
        '''
        Reads a minibatch that contains data for all input streams.  The
        minibatch size is specified in terms of #samples and/or #sequences for the
        primary input stream; value of 0 for #samples/#sequences means
        unspecified.  In case the size is specified in terms of both #sequences
        and #samples, the smaller of the 2 is taken.  An empty map is returned
        when the MinibatchSource has no more data to return.

        Args:
            minibatch_size_in_samples (int): number of samples to retrieve for
             the next minibatch. Must be > 0.
            minibatch_size_in_sequences (int, defaults to `None`): number of
             samples to retrieve for the next minibatch. Must be > 0.
            input_map (dict): mapping of :class:`~cntk.ops.variabls.Variable`
             to :class:`StreamInformation` which will be used to convert the
             returned data.
            device (`DeviceDescriptor`, defaults to `None`): CNTK DeviceDescriptor

        Returns:
            A mapping of :class:`StramInformation` to :class:`MinibatchData` if
            ``input_map`` was not specified. Otherwise, the returned value will
            be a mapping of :class:`~cntk.ops.variabls.Variable` to class:`MinibatchData`.
        '''
        if device is None:
            device = use_default_device()

        if minibatch_size_in_samples is None and \
                minibatch_size_in_sequences is None:
            raise ValueError('you have to specify at least one of '
                    'minibatch_size_in_samples or minibatch_size_in_sequences')

        if minibatch_size_in_sequences is None:
            mb = super(MinibatchSource, self).get_next_minibatch(
                minibatch_size_in_samples, device)
        else:
            if minibatch_size_in_samples is None:
                minibatch_size_in_samples = 0

            mb = super(MinibatchSource, self).get_next_minibatch(
                minibatch_size_in_samples,
                minibatch_size_in_sequences, device)

        if input_map:
            if not mb:
                return None
            else:
                return { key : mb[value] for (key, value) in input_map.items() }
        else:
            return mb
コード例 #13
0
    def next_minibatch(self,
                       minibatch_size_in_samples,
                       input_map=None,
                       device=None,
                       num_data_partitions=None,
                       partition_index=None):
        '''
        Reads a minibatch that contains data for all input streams.  The
        minibatch size is specified in terms of #samples and/or #sequences for the
        primary input stream; value of 0 for #samples/#sequences means
        unspecified.  In case the size is specified in terms of both #sequences
        and #samples, the smaller of the 2 is taken.  An empty map is returned
        when the MinibatchSource has no more data to return.

        Args:
            minibatch_size_in_samples (int): number of samples to retrieve for
             the next minibatch. Must be > 0.
            input_map (dict): mapping of :class:`~cntk.ops.variabls.Variable`
             to :class:`StreamInformation` which will be used to convert the
             returned data.
            device (`DeviceDescriptor`, defaults to `None`): CNTK DeviceDescriptor
            num_data_partitions: Used for distributed training, indicates into how many partitions
             the source should split the data.
            partition_index: Used for distributed training, indicates data from which partition to take.

        Returns:
            a mapping of :class:`StreamInformation` to :class:`MinibatchData` if
            ``input_map`` was not specified. Otherwise, the returned value will
            be a mapping of :class:`~cntk.ops.variables.Variable` to class:`MinibatchData`.
        '''
        if device is None:
            device = use_default_device()

        if num_data_partitions is None:
            num_data_partitions = 1

        if partition_index is None:
            partition_index = 0

        mb = super(MinibatchSource,
                   self).get_next_minibatch(0, minibatch_size_in_samples,
                                            num_data_partitions,
                                            partition_index, device)

        if input_map:
            if not mb:
                return {}
            else:
                return {key: mb[value] for (key, value) in input_map.items()}
        else:
            return mb
コード例 #14
0
def one_hot(batch, num_classes, dtype=None, device=None):
    '''
    Converts ``batch`` into a :class:`Value` object of ``dtype``
    such that the integer data in ``batch`` is interpreted as the indices
    representing one-hot vectors.

    Example:
        >>> num_classes = 6
        >>> sparse_indices = [[1,5],[4]]
        >>> i0 = C.input_variable(shape=num_classes, is_sparse=True)
        >>> z = C.times(i0, np.eye(num_classes))
        >>> value = C.one_hot(sparse_indices, num_classes)
        >>> z.eval({i0: value})
        [array([[ 0.,  1.,  0.,  0.,  0.,  0.],
               [ 0.,  0.,  0.,  0.,  0.,  1.]], dtype=float32), array([[ 0.,  0.,  0.,  0.,  1.,  0.]], dtype=float32)]

    Args:
        batch (list of lists of integers): batch input data of indices
        num_classes (int): number of classes
        dtype (`np.float32`, `np.float64`, default None): data type
        device (:class:`~cntk.device.DeviceDescriptor`, default None): device
         this value should be put on

    Returns:
        ``batch`` converted into a :class:`~Value` object that can be passed to
        the forward or eval function.
    '''
    if device is None:
        device = use_default_device()

    if isinstance(batch, np.ndarray):
        batch = batch.tolist()

    try:
        data_type = type(batch[0][0])
    except:
        raise ValueError('input must be a list of list of integers')

    if data_type != int:
        raise ValueError('supplied data to one_hot() must be of type integer'
                         ' and not "%s" since it is index data.' % data_type)

    if dtype in [np.float32, None]:
        value = cntk_py.Value.create_one_hot_float(num_classes, batch, device,
                                                   False)
    elif dtype == np.float64:
        value = cntk_py.Value.create_one_hot_double(num_classes, batch, device,
                                                    False)
    return value
コード例 #15
0
ファイル: __init__.py プロジェクト: BorisJineman/CNTK
    def next_minibatch(self, minibatch_size_in_samples,
            input_map=None, device=None, num_data_partitions=None, partition_index=None):
        '''
        Reads a minibatch that contains data for all input streams.  The
        minibatch size is specified in terms of #samples and/or #sequences for the
        primary input stream; value of 0 for #samples/#sequences means
        unspecified.  In case the size is specified in terms of both #sequences
        and #samples, the smaller of the 2 is taken.  An empty map is returned
        when the MinibatchSource has no more data to return.

        Args:
            minibatch_size_in_samples (int): number of samples to retrieve for
              the next minibatch. Must be > 0.
              **Important:**
              Click `here <https://github.com/Microsoft/CNTK/wiki/BrainScript-epochSize-and-Python-epoch_size-in-CNTK>`_ for a full description of this parameter. 
            input_map (dict): mapping of :class:`~cntk.ops.variables.Variable`
              to :class:`~cntk.cntk_py.StreamInformation` which will be used to convert the
              returned data.
            device (`DeviceDescriptor`, defaults to `None`): CNTK DeviceDescriptor
            num_data_partitions: Used for distributed training, indicates into how many partitions
              the source should split the data.
            partition_index (`int`, defaults to `None`): Used for distributed training, indicates data from which partition to take.

        Returns:
            cntk.io.MinibatchData:
            A mapping of :class:`~cntk.cntk_py.StreamInformation` to :class:`MinibatchData` if
            `input_map` was not specified. Otherwise, the returned value will
            be a mapping of :class:`~cntk.ops.variables.Variable` to class:`MinibatchData`.
        '''
        if device is None:
            device = use_default_device()

        if num_data_partitions is None:
            num_data_partitions = 1

        if partition_index is None:
            partition_index = 0

        mb = super(MinibatchSource, self).get_next_minibatch(0,
                minibatch_size_in_samples, num_data_partitions, partition_index, device)

        if input_map:
            if not mb:
                return {}
            else:
                return { key : mb[value] for (key, value) in input_map.items() }
        else:
            return mb
コード例 #16
0
ファイル: persist.py プロジェクト: jplu/CNTK
def load_model(filename, device=None):
    '''
    Load the network in ``filename``, that has been saved using
    `:func:save_model`.

    Args:
        filename (str): filename to load the model from
        device (:class:`~cntk.DeviceDescriptor`, default is the default device):
         instance of DeviceDescriptor

    Returns:
        root node
    '''
    if not device:
        device = use_default_device()
    return cntk_py.Function.load_model(filename, device)
コード例 #17
0
ファイル: persist.py プロジェクト: nagyistge/Microsoft.CNTK
def load_model(filename, device=None):
    '''
    Load the network in ``filename``, that has been saved using
    `:func:save_model`.

    Args:
        filename (str): filename to load the model from
        device (:class:`~cntk.DeviceDescriptor`, default is the default device):
         instance of DeviceDescriptor

    Returns:
        root node
    '''
    if not device:
        device = use_default_device()
    return cntk_py.Function.load_model(filename, device)
コード例 #18
0
ファイル: __init__.py プロジェクト: FDecaYed/CNTK
def one_hot(batch, num_classes, dtype=None, device=None):
    '''
    Converts ``batch`` into a :class:`~cntk.core.Value` object of ``dtype``
    such that the integer data in ``batch`` is interpreted as the indices
    representing one-hot vectors.

    Example:
        >>> num_classes = 6
        >>> sparse_indices = [[1,5],[4]]
        >>> i0 = C.input_variable(shape=num_classes, is_sparse=True)
        >>> z = C.times(i0, np.eye(num_classes))
        >>> value = C.one_hot(sparse_indices, num_classes)
        >>> z.eval({i0: value})
        [array([[ 0.,  1.,  0.,  0.,  0.,  0.],
               [ 0.,  0.,  0.,  0.,  0.,  1.]], dtype=float32), array([[ 0.,  0.,  0.,  0.,  1.,  0.]], dtype=float32)]

    Args:
        batch (list of lists of integers): batch input data of indices
        num_classes (int): number of classes
        dtype (`np.float32`, `np.float64`, default None): data type
        device (:class:`~cntk.device.DeviceDescriptor`, default None): device
         this value should be put on

    Returns:
        ``batch`` converted into a :class:`~Value` object that can be passed to
        the forward or eval function.
    '''
    if device is None:
        device = use_default_device()

    if isinstance(batch, np.ndarray):
        batch = batch.tolist()

    try:
        data_type = type(batch[0][0])
    except:
        raise ValueError('input must be a list of list of integers')

    if data_type != int:
        raise ValueError('supplied data to one_hot() must be of type integer'
                ' and not "%s" since it is index data.'%data_type)

    if dtype in [np.float32, None]:
        value = cntk_py.Value.create_one_hot_float(num_classes, batch, device, False)
    elif dtype == np.float64:
        value = cntk_py.Value.create_one_hot_double(num_classes, batch, device, False)
    return value
コード例 #19
0
ファイル: __init__.py プロジェクト: vamsirajendra/CNTK
    def __init__(self, shape=None, dtype=None, data=None, mask=None, device=None):
        if device is None:
            device = use_default_device()

        if shape and dtype:
            ndav = _create_NDArrayView(shape, dtype, device)

        elif data:
            if isinstance(data, np.ndarray):
                ndav = _create_NDArrayView_from_NumPy(data, device)
            else:
                ndav = data

        if mask:
            super(Value, self).__init__(ndav, mask)
        else:
            super(Value, self).__init__(ndav)
コード例 #20
0
ファイル: __init__.py プロジェクト: jplu/CNTK
    def __init__(self, shape=None, dtype=None, data=None, mask=None, device=None):
        if device is None:
            device = use_default_device()

        if shape and dtype:
            ndav = _create_NDArrayView(shape, dtype, device)

        elif data:
            if isinstance(data, np.ndarray):
                ndav = _create_NDArrayView_from_NumPy(data, device)
            else:
                ndav = data

        if mask:
            super(Value, self).__init__(ndav, mask)
        else:
            super(Value, self).__init__(ndav)
コード例 #21
0
ファイル: persist.py プロジェクト: zgsxwsdxg/CNTK
def load_model(data_type, filename, device=None):
    '''
    Load the network in ``filename``, that has been saved using
    `:func:save_model`.

    Args:
        data_type ('float' or 'double', or NumPy type): data type of the operation
        filename (`str`): filename to load the model from
        device (:class:`cntk.device.DeviceDescriptor`, default to default device): instance of DeviceDescriptor

    Returns:
        root node
    '''
    from cntk.utils import sanitize_dtype_cntk
    data_type = sanitize_dtype_cntk(data_type)
    if not device:
        device = use_default_device()
    return cntk_py.load_legacy_model(data_type, filename)
コード例 #22
0
ファイル: persist.py プロジェクト: hahatt/CNTK
def load_model(data_type, filename, device=None):
    '''
    Load the network in `model_file`, that has been saved using
    `:func:save_model`.

    Args:
        data_type ('float' or 'double', or NumPy type): data type of the operation
        filename (`str`): filename to load the model from
        device (:class:`cntk.DeviceDescriptor`, default to default device): instance of DeviceDescriptor

    Returns:
        root node
    '''
    from cntk.utils import sanitize_dtype_cntk
    data_type = sanitize_dtype_cntk(data_type)
    if not device:
        device = use_default_device()
    return cntk_py.load_legacy_model(data_type, filename)
コード例 #23
0
ファイル: __init__.py プロジェクト: Microsoft/CNTK
    def __init__(self, shape=None, dtype=None, batch=None, seq_starts=None, device=None):
        if device is None:
            device = use_default_device()

        if shape and dtype:
            # FIXME is this needed?
            ndav = _create_NDArrayView(shape, dtype, device)

        elif batch:
            if isinstance(batch, np.ndarray):
                ndav = _create_NDArrayView_from_NumPy(batch, device)
            else:
                ndav = batch

        if seq_starts:
            super(Value, self).__init__(ndav, seq_starts)
        else:
            super(Value, self).__init__(ndav)
コード例 #24
0
    def __init__(self, shape=None, dtype=None, batch=None, seq_starts=None, device=None):
        if device is None:
            device = use_default_device()

        if shape and dtype:
            # FIXME is this needed?
            ndav = _create_NDArrayView(shape, dtype, device)

        elif batch:
            if isinstance(batch, np.ndarray):
                ndav = _create_NDArrayView_from_NumPy(batch, device)
            else:
                ndav = batch

        if seq_starts:
            super(Value, self).__init__(ndav, seq_starts)
        else:
            super(Value, self).__init__(ndav)
コード例 #25
0
def load_model(filename, dtype=np.float32, device=None):
    '''
    Load the network in ``filename``, that has been saved using
    `:func:save_model`.

    Args:
        filename (str): filename to load the model from
        dtype ('float', 'double', or NumPy type, default np.float32): data
         type of the operation
        device (:class:`~cntk.DeviceDescriptor`, default is the default device):
         instance of DeviceDescriptor

    Returns:
        root node
    '''
    from cntk.utils import sanitize_dtype_cntk
    dtype = sanitize_dtype_cntk(dtype)
    if not device:
        device = use_default_device()
    return cntk_py.Function.load_model(dtype, filename, device)
コード例 #26
0
ファイル: persist.py プロジェクト: shadrack4292/CNTK
def load_model(filename, dtype=np.float32, device=None):
    '''
    Load the network in ``filename``, that has been saved using
    `:func:save_model`.

    Args:
        filename (`str`): filename to load the model from
        dtype ('float', 'double', or NumPy type, default ``np.float32``): data
         type of the operation
        device (:class:`cntk.DeviceDescriptor`, default is the default device):
         instance of DeviceDescriptor

    Returns:
        root node
    '''
    from cntk.utils import sanitize_dtype_cntk
    dtype = sanitize_dtype_cntk(dtype)
    if not device:
        device = use_default_device()
    return cntk_py.Function.load_model(dtype, filename, device)
コード例 #27
0
def one_hot(batch, num_classes, dtype=None, device=None):
    '''
    Converts ``batch`` into a :class:`Value` object of ``dtype``
    such that the integer data in ``batch`` is interpreted as the indices
    representing one-hot vectors. Additionally, a SciPy CSR matrix can be obtained
    by calling :meth:`~cntk.utils.Value.to_csr`.

    Example:
        >>> num_classes = 6
        >>> sparse_indices = [[1,5],[4]]
        >>> i0 = C.input_variable(shape=num_classes, is_sparse=True)
        >>> z = C.times(i0, np.eye(num_classes))
        >>> value = C.one_hot(sparse_indices, num_classes)
        >>> z.eval({i0: value})
        [array([[ 0.,  1.,  0.,  0.,  0.,  0.],
               [ 0.,  0.,  0.,  0.,  0.,  1.]], dtype=float32), array([[ 0.,  0.,  0.,  0.,  1.,  0.]], dtype=float32)]

    Args:
        batch (NumPy array or list (of lists, if sequence) of index data): batch input data
        num_classes (int): number of classes
        dtype (`np.float32`, `np.float64`, default None): data type
        device (:class:`~cntk.device.DeviceDescriptor`, default None): device
         this value should be put on

    Returns:
        ``batch`` converted into a :class:`~Value` object that can be passed to
        the forward or eval function.
    '''
    if device is None:
        device = use_default_device()

    if isinstance(batch, np.ndarray):
        batch = batch.tolist()

    if dtype in [np.float32, None]:
        value = cntk_py.Value.create_one_hot_float(num_classes, batch, device,
                                                   False)
    elif dtype == np.float64:
        value = cntk_py.Value.create_one_hot_double(num_classes, batch, device,
                                                    False)
    return value
コード例 #28
0
ファイル: __init__.py プロジェクト: Microsoft/CNTK
def one_hot(batch, num_classes, dtype=None, device=None):
    '''
    Converts ``batch`` into a :class:`Value` object of ``dtype``
    such that the integer data in ``batch`` is interpreted as the indices
    representing one-hot vectors. Additionally, a SciPy CSR matrix can be obtained
    by calling :meth:`~cntk.utils.Value.to_csr`.

    Example:
        >>> num_classes = 6
        >>> sparse_indices = [[1,5],[4]]
        >>> i0 = C.input_variable(shape=num_classes, is_sparse=True)
        >>> z = C.times(i0, np.eye(num_classes))
        >>> value = C.one_hot(sparse_indices, num_classes)
        >>> z.eval({i0: value})
        [array([[ 0.,  1.,  0.,  0.,  0.,  0.],
               [ 0.,  0.,  0.,  0.,  0.,  1.]], dtype=float32), array([[ 0.,  0.,  0.,  0.,  1.,  0.]], dtype=float32)]

    Args:
        batch (NumPy array or list (of lists, if sequence) of index data): batch input data
        num_classes (int): number of classes
        dtype (`np.float32`, `np.float64`, default None): data type
        device (:class:`~cntk.device.DeviceDescriptor`, default None): device
         this value should be put on

    Returns:
        ``batch`` converted into a :class:`~Value` object that can be passed to
        the forward or eval function.
    '''
    if device is None:
        device = use_default_device()

    if isinstance(batch, np.ndarray):
        batch = batch.tolist()

    if dtype in [np.float32, None]:
        value = cntk_py.Value.create_one_hot_float(num_classes, batch, device, False)
    elif dtype == np.float64:
        value = cntk_py.Value.create_one_hot_double(num_classes, batch, device, False)
    return value
コード例 #29
0
ファイル: __init__.py プロジェクト: FDecaYed/CNTK
def sanitize_batch(var, batch, seq_starts=None, device=None):
    '''
    Convert to :class:`~cntk.core.Value`.

    Args:
        var (:class:`~cntk.ops.variables.Variable`): input variable into which
         ``batch`` is passed
        batch: batch input for `var`. It can be
         * a single NumPy array denoting the full minibatch
         * a list of NumPy arrays or SciPy sparse CSR matrices each representing a sequence
         * a :class:`~cntk.core.Value` object (e.g. returned by :func:`one_hot`)
        seq_starts (list of `bool`s or None): if None, every sequence is
         treated as a new sequence. Otherwise, it is interpreted as a list of
         Booleans one for each sequence in the batch that tell whether a
         sequence is a new sequence (`True`) or a continuation of the sequence
         in the same slot of the previous minibatch (`False`)
        device (:class:`~cntk.device.DeviceDescriptor`, default None): device
         this value should be put on

    Returns:
        :class:`~cntk.core.Value`: converted batch that can be passed to the core API
    '''
    if isinstance(batch, cntk_py.Value):
        if seq_starts is not None:
            raise ValueError('for directly passed Value objects sequence '
                    'starts cannot be used yet.')
        return batch

    if seq_starts and len(var.dynamic_axes)<=1:
        raise ValueError('you specified sequence begin markers, but your '
                'input_variable does not contain a sequence axis.')

    if device is None:
        device = use_default_device()

    from .. import Value
    return Value.create(var, batch, seq_starts, device)
コード例 #30
0
def sanitize_batch(var, batch, seq_starts=None, device=None):
    '''
    Convert to :class:`~cntk.core.Value`.

    Args:
        var (:class:`~cntk.ops.variables.Variable`): input variable into which
         ``batch`` is passed
        batch: batch input for `var`. It can be
         * a single NumPy array denoting the full minibatch
         * a list of NumPy arrays or SciPy sparse CSR matrices each representing a sequence
         * a :class:`~cntk.core.Value` object (e.g. returned by :func:`one_hot`)
        seq_starts (list of `bool`s or None): if None, every sequence is
         treated as a new sequence. Otherwise, it is interpreted as a list of
         Booleans one for each sequence in the batch that tell whether a
         sequence is a new sequence (`True`) or a continuation of the sequence
         in the same slot of the previous minibatch (`False`)
        device (:class:`~cntk.device.DeviceDescriptor`, default None): device
         this value should be put on

    Returns:
        :class:`~cntk.core.Value`: converted batch that can be passed to the core API
    '''
    if isinstance(batch, cntk_py.Value):
        if seq_starts is not None:
            raise ValueError('for directly passed Value objects sequence '
                    'starts cannot be used yet.')
        return batch

    if seq_starts and len(var.dynamic_axes)<=1:
        raise ValueError('you specified sequence begin markers, but your '
                'input_variable does not contain a sequence axis.')

    if device is None:
        device = use_default_device()

    from .. import Value
    return Value.create(var, batch, seq_starts, device)
コード例 #31
0
ファイル: __init__.py プロジェクト: Microsoft/CNTK
    def create(var, batch, seq_starts=None, device=None, read_only=False):
        '''
        Creates a :class:`Value` object.

        Args:
            var (:class:`~cntk.ops.variables.Variable`): input variable into which
             ``batch`` is passed
            batch: batch input. It can be
             * a single NumPy array denoting the full minibatch
             * a list of NumPy arrays or SciPy sparse CSR matrices
            seq_starts (list of `bool`s or None): if None, every sequence is
             treated as a new sequence. Otherwise, it is interpreted as a list of
             Booleans that tell whether a sequence is a new sequence (`True`) or a
             continuation of the sequence in the same slot of the previous
             minibatch (`False`)
            device (:class:`~cntk.device.DeviceDescriptor`, default None): device
             this value should be put on
            read_only (bool, default False): whether the data is read only

        Returns:
            :class:`Value` object.
        '''
        if isinstance(batch, np.ndarray):
            # The outermost axis has to be Python list. If the user passes a
            # full minibatch as one NumPy array, we have to convert it.
            if batch.dtype == object:
                raise ValueError('dtype object is not supported. If this is a batch '
                        'of sequences, you need to pass them as a pure-Python list '
                        'of NumPy arrays')

            # FIXME if not seq_starts: directly pass it to Value constructor

            batch = list(batch)

        if not isinstance(batch, list):
            raise ValueError('batch has to be a list of NumPy arrays or '
                    'SciPy CSR matrices')

        list_of_ndavs = []

        # NDArrayViews are all created on CPU. The Value object later then will
        # move it to the requested device.
        cpu_dev = cpu()
        for sample in batch:
            if isinstance(sample, list):
                sample = np.asarray(sample, dtype=var.dtype)
                if sample.dtype != var.dtype:
                    raise ValueError('could not convert sample data to '
                            'NumPy array')

            if not (isinstance(sample, np.ndarray) or sparse.issparse(sample)):
                raise ValueError('sample type "%s" is not supported. Please '
                        'provide the data as a Python list of NumPy arrays '
                        'or Scipy CSR matrices.'%type(sample))

            if np.issubdtype(sample.dtype, int):
                sample = sample.astype(var.dtype)
            elif sample.dtype not in (np.float32, np.float64):
                raise ValueError('only integer, float32 and float64 are supported, '
                        'you gave %s'%sample.dtype)

            if isinstance(sample, np.ndarray):
                if not _is_c_contiguous(sample):
                    raise ValueError('supplied data is not C contiguous; use '
                            'np.ascontiguousarray (slow) or rearrange your data/computation')
                ndav = _create_NDArrayView_from_NumPy(sample, cpu_dev)

            elif sparse.issparse(sample):
                if not sparse.isspmatrix_csr(sample):
                    raise ValueError("only CSR is supported as of now. Please "
                            "convert your data using 'tocsr()'")

                ndav = cntk_py.NDArrayView(sample.shape, sample.data,
                        sample.indptr, sample.indices, cpu_dev, False)

            list_of_ndavs.append(ndav)

        return cntk_py.Value_create(
                _as_tuple(var.shape), list_of_ndavs,
                seq_starts or [],
                device or use_default_device(),
                read_only)
コード例 #32
0
ファイル: __init__.py プロジェクト: Microsoft/CNTK
def _create_NDArrayView_from_NumPy(nd, device=None):
    if device is None:
        device = use_default_device()

    return cntk_py.NDArrayView(nd, device, False)
コード例 #33
0
def _create_NDArrayView_from_NumPy(nd, device=None):
    if device is None:
        device = use_default_device()

    return cntk_py.NDArrayView(nd, device, False)
コード例 #34
0
ファイル: __init__.py プロジェクト: vamsirajendra/CNTK
def sanitize_batch(var, batch, seq_starts=None, dtype=None, device=None):
    '''
    Convert to :class:`Value` with ``dtype``. If the samples in
    ``batch`` have different sequence lengths, pad them to max sequence length
    and create a mask.

    Args:
        var (:class:`~cntk.ops.variables.Variable`): variable node for which
         the ``batch`` is meant
        batch: batch input for `var`. It can be a pure Python structure (list
         of lists, ...), a combination of lists of NumPy arrays or SciPy
         sparse CSR matrices. Alternatively, it can also be the output of
         :func:`one_hot`.
        seq_starts (list of `bool`s or None): if None, every sequence is
         treated as a new sequence. Otherwise, it is interpreted as a list of
         Booleans that tell whether a sequence is a new sequence (`True`) or a
         continuation of the sequence in the same slot of the previous
         minibatch (`False`)
        device (:class:`~cntk.device.DeviceDescriptor`, default None): device
         this value should be put on

    Returns:
        :class:`Value`: converted batch that can be passed to the core API
    '''
    if isinstance(batch, cntk_py.Value):
        return batch

    if isinstance(batch, list):
        if len(batch) == 0:
            raise ValueError('batch is empty')

    if isinstance(batch, np.ndarray) and batch.dtype == object:
        raise ValueError('dtype object is not supported. If this is a batch '
                'of sequences, you need to pass them as a pure-Python list '
                'of NumPy arrays')

    # We need to figure out whether the data has a sequence axis. Note that
    # it is not enough to check whether the variable's dynamic axes include the
    # sequence axis, because the sequence axis might be omitted in the data if
    # it is not needed (CNTK core would then take care of this).
    batch_has_seq = _has_seq_dim(var, batch)

    is_dense = _is_dense(batch)

    if batch_has_seq or seq_starts:
        if isinstance(batch[0], list):
            seq_lens = [len(seq) for seq in batch]
        else:
            seq_lens = [seq.shape[0] for seq in batch]

        max_seq_len = max(seq_lens)

        # If the input is a list of lists of dense values, all of the same
        # length, we convert it into a NumPy array.
        if is_dense and len(set(seq_lens)) == 1:
            batch_has_seq = False
            batch = np.asarray(batch, dtype=var.dtype)

    if dtype is None:
        dtype = get_data_type(var)

    if device is None:
        device = use_default_device()

    # batch is now either a dense input that requires a mask, or it is sparse
    if batch_has_seq or seq_starts:
        mask = cntk_py.NDMask((len(batch), max_seq_len),
                device or use_default_device())
        for idx, seq_len in enumerate(seq_lens):
            if seq_starts is None or seq_starts[idx]:
                mask.mark_sequence_begin((0, idx))
            # The first parameter is provided as a vector of ints, and thus
            # won't be automatically reversed to col-major, because of which we
            # provide it as such.
            # The second parameter is specifying the rectangle of the mask that
            # is invalid. As C++ is taking an NDShape, and we reverse the shape
            # in the SWIG layer, we provide it here as row-major.
            mask.invalidate_section((seq_len, idx),
                                    (1, cntk_py.InferredDimension))
    else:
        mask = None

    if is_dense:
        if batch_has_seq:
            batch = _pad_dense_to_max_len(var, batch, max_seq_len)
        if not isinstance(batch, np.ndarray):
            batch = np.asarray(batch)
        ndav = _create_NDArrayView_from_NumPy(batch.astype(dtype), device)
        return Value(data=ndav, mask=mask)

    if isinstance(batch, np.ndarray):
        if np.issubdtype(batch.dtype, int):
            batch = batch.astype(var.dtype)
        elif batch.dtype not in (np.float32, np.float64):
            raise ValueError('only float32 and float64 are supported')

        ndav = _create_NDArrayView_from_NumPy(batch.astype(dtype), device)
        return Value(data=ndav, mask=mask)

    # There are three possibilities of providing sparse batches:
    # 1. batch is given as one big sparse array
    batch_is_sparse = sparse.issparse(batch)
    if batch_is_sparse:
        sparse_tmp = batch
    else:
        # 2. batch is given as a list of sparse arrays, each of which is a full
        #    sequence
        batch_has_sparse_sequences = batch_is_sparse or sparse.issparse(batch[0])
        if batch_has_sparse_sequences:
            sparse_tmp = batch[0]
        else:
            # 3. batch is given as a list of lists containing the sparse sequence
            #    elements
            batch_has_sparse_elements = batch_has_sparse_sequences or \
                    sparse.issparse(batch[0][0])
            if batch_has_sparse_elements:
                sparse_tmp = batch[0][0]

    if not sparse.isspmatrix_csr(sparse_tmp):
        raise ValueError("only CSR is supported as of now. Please "
                "convert your data using 'batch.tocsr()'")

    if batch_is_sparse or batch_has_sparse_sequences or \
            batch_has_sparse_elements:

        batch_shape = batch.shape if hasattr(batch, 'shape') else (len(batch),)
        sample_shape = var.shape

        if not batch_is_sparse:
            # batch is not one big sparse matrix, but a list of them (or a list
            # of lists of them), so we have to create one. Two  possibilities:
            # 1. Batch has sequence axis: only 1d sparse vectors are allowed.
            # 2. Ohterwise, 1d or 2d sparse tensors are allowed
            if batch_has_seq:
                shape = batch[0][0].shape
                if not (len(shape)==1 or len(shape)==2 and shape[0]==1):
                    raise ValueError('only 1D sparse vectors are supported in '
                            ' sequence data, you gave shape %s'%str(shape))
                # Pad and stack the sparse vectors.
                if batch_has_seq:
                    batch = _pad_sparse_seq_to_max_len(batch, max_seq_len)
                batch_shape += (max_seq_len,)
                # We are actually 1D. If rank==2, then the first dim is 1.
                sample_shape = sample_shape[-1]
            else:
                sample_shape = batch[0][0].shape
                if len(sample_shape) not in [1,2]:
                    raise ValueError('only 1D or 2D sparse vectors are supported')

            # Vertically stack sequences/samples
            batch = sparse.vstack(batch, format='csr')

            batch_shape += _as_tuple(sample_shape)

        ndav = cntk_py.NDArrayView(batch_shape, batch.data.astype(var.dtype),
                batch.indptr, batch.indices, device, False)

        return Value(data=ndav, mask=mask)

    else:
        raise ValueError('batch input not understood')
コード例 #35
0
def sanitize_batch(var, batch, seq_starts=None, data_type=None, device=None):
    '''
    Convert to :class:`cntk.cntk_py.Value` with ``data_type``. If the samples in ``batch`` have
    different sequence lengths, pad them to max sequence length and create a
    mask.

    Args:
        var (:class:`cntk.ops.variables.Variable`): variable node for which the ``batch`` is
         meant
        batch (`list` of NumPy arrays): input
        seq_starts (`list` of `bool` or `None`): if `None`, every sequence is
         treated as a new sequence. Otherwise, it is interpreted as a list of
         Booleans that tell whether a sequence is a new sequence (`True`) or a
         continuation of the previous one (`False`)

    Returns:
        :class:`cntk.cntk_py.Value`: converted batch
    '''
    from ..cntk_py import Value

    if isinstance(batch, Value):
        return batch

    use_mask = False

    if isinstance(batch, np.ndarray):
        if batch.dtype == np.int:
            batch = batch.astype(np.float32)
        elif batch.dtype not in (np.float32, np.float64):
            raise ValueError('only float32 and float64 are supported')
    elif isinstance(batch, list):
        if is_tensor_list(batch):
            use_mask =  len(var.dynamic_axes) > 1

    if device is None:
        device = use_default_device()

    if not use_mask and seq_starts is not None:
        raise ValueError('specification of individual sequence begins does not'
                ' make sense when not using the sequence axis')

    # Use the mask, if we have additional dynamic axes besides the batch axis

    if use_mask:
        seq_lens = [len(seq) for seq in batch]

        try:
            num_seq = len(batch)
        except TypeError:
            raise ValueError('expected an object of type Value or a NumPy ' +
                             'array and not "%s"' % type(batch))

        from cntk.cntk_py import NDMask
        mask = NDMask((max(seq_lens), num_seq), device)
        for idx, seq_len in enumerate(seq_lens):
            if seq_starts is None:
                mask.mark_sequence_begin((0, idx))
            elif seq_starts[idx]:
                mask.mark_sequence_begin((0, idx))
            mask.invalidate_section((seq_len, idx),
                                    (cntk_py.InferredDimension, 1))

        # Then we pad the batch to rectangular shape
        if isinstance(batch, list):
            if len(batch) == 0:
                raise ValueError('batch is empty')

            batch = pad_to_dense(batch)

    # If it still is not an NumPy array, try brute force...
    if not isinstance(batch, np.ndarray):
        if data_type is None:
            data_type = get_data_type(var)
        batch = np.asarray(batch, dtype=data_type)

    # Maybe a NumPy dtype was given, but with lower accuracy than float32, then
    # convert it to float32
    if np.issubdtype(batch.dtype, int):
        batch = batch.astype(np.float32)

        if len(cntk_shape) == 0:
            raise ValueError('values should be an array of input samples')

    ndav = create_NDArrayView_from_NumPy(batch, device)

    if use_mask:
        value = Value(ndav, mask)
    else:
        value = Value(ndav)

    return value
コード例 #36
0
def init_train_fast_rcnn(image_height, image_width, num_classes, num_rois, mb_size, max_epochs, cntk_lr_per_image, l2_reg_weight,
                         momentum_time_constant, base_path, boSkipTraining = False, debug_output=False, tensorboardLogDir = None):

    #make sure we use GPU for training
    if use_default_device().type() == 0:
        print("WARNING: using CPU for training.")
    else:
        print("Using GPU for training.")

    # Instantiate the Fast R-CNN prediction model
    image_input = input_variable((3, image_height, image_width))
    roi_input   = input_variable((num_rois, 4))
    label_input = input_variable((num_rois, num_classes))
    frcn_output, frcn_penultimateLayer = frcn_predictor(image_input, roi_input, num_classes, base_path)

    if boSkipTraining:
        print("Using pre-trained DNN without refinement")
        return frcn_penultimateLayer

    # Create the minibatch source and define mapping from reader streams to network inputs
    minibatch_source, epoch_size = create_mb_source("train", image_height, image_width, num_classes, num_rois,
                                                    base_path, randomize=True)
    input_map = {
        image_input: minibatch_source.streams.features,
        roi_input: minibatch_source.streams.rois,
        label_input: minibatch_source.streams.roiLabels
    }

    # set loss / error functions
    ce = cross_entropy_with_softmax(frcn_output, label_input, axis=1)
    pe = classification_error(frcn_output, label_input, axis=1)
    if debug_output:
        plot(frcn_output, "graph_frcn.png")

    # set the progress printer(s)
    progress_writers = [ProgressPrinter(tag='Training', num_epochs=max_epochs)]
    if tensorboardLogDir != None:
        tensorboard_writer = TensorBoardProgressWriter(freq=10, log_dir=tensorboardLogDir, model=frcn_output)
        progress_writers.append(tensorboard_writer)

    # Set learning parameters and instantiate the trainer object
    lr_per_sample = [f/float(num_rois) for f in cntk_lr_per_image]
    lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample)
    mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant)
    learner = momentum_sgd(frcn_output.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight)
    trainer = Trainer(frcn_output, (ce, pe), learner, progress_writers)

    # Get minibatches of images and perform model training
    print("Training Fast R-CNN model for %s epochs." % max_epochs)
    log_number_of_parameters(frcn_output)
    for epoch in range(max_epochs):
        sample_count = 0

        # loop over minibatches in the epoch
        while sample_count < epoch_size:
            data = minibatch_source.next_minibatch(min(mb_size, epoch_size - sample_count), input_map=input_map)
            if sample_count % 100 == 1:
                print("Training in progress: epoch {} of {}, sample count {} of {}".format(epoch, max_epochs, sample_count, epoch_size))
            trainer.train_minibatch(data)
            sample_count += trainer.previous_minibatch_sample_count          # count samples processed so far
        trainer.summarize_training_progress()

        # Log mean of each parameter tensor, so that we can confirm that the parameters change indeed.
        if tensorboardLogDir != None:
            for parameter in frcn_output.parameters:
                tensorboard_writer.write_value(parameter.uid + "/mean", np.mean(parameter.value), epoch)
                tensorboard_writer.write_value(parameter.uid + "/std", np.std(parameter.value), epoch)
                tensorboard_writer.write_value(parameter.uid + "/absSum", np.sum(np.abs(parameter.value)), epoch)

        if debug_output:
            frcn_output.save_model("frcn_py_%s.model" % (epoch + 1))
    return frcn_output
コード例 #37
0
ファイル: __init__.py プロジェクト: zgsxwsdxg/CNTK
def create_NDArrayView_from_NumPy(nd, dev=None):
    if not dev:
        dev = use_default_device()

    return cntk_py.NDArrayView(nd, dev, False)
コード例 #38
0
prediction_algorithm_name = 'Easy'
print("СКРИПТ ПОТОЧНОГО ПРОГНОЗИРОВАНИЯ " + prediction_algorithm_name +
      " ЗАПУЩЕН...")

import random
random.seed()
session = random.getrandbits(16)
print("session = " + (str)(session))

from cntk.device import try_set_default_device, gpu
import cntk.device as C
print(C.all_devices())
print(C.try_set_default_device(C.gpu(0)))
print(C.use_default_device())
import time
import sys
import argparse
import numpy
from datetime import datetime
from keras.models import load_model
import json


#print(sys.platform)
def createParser():
    parser = argparse.ArgumentParser()
    #parser.add_argument('--json_file_path',type=str,default='D:\Anton\Desktop\MAIN\Экспертная система\Экспертная система\Алгоритмы прогнозирования\LSTM 1\h.json')
    parser.add_argument('--json_file_path', type=str)
    return parser

コード例 #39
0
ファイル: __init__.py プロジェクト: shadrack4292/CNTK
def sanitize_batch(var, batch, seq_starts=None, data_type=None, device=None):
    '''
    Convert to :class:`cntk.cntk_py.Value` with ``data_type``. If the samples in ``batch`` have
    different sequence lengths, pad them to max sequence length and create a
    mask.

    Args:
        var (:class:`cntk.ops.variables.Variable`): variable node for which the ``batch`` is
         meant
        batch (`list` of NumPy arrays): input
        seq_starts (`list` of `bool` or `None`): if `None`, every sequence is
         treated as a new sequence. Otherwise, it is interpreted as a list of
         Booleans that tell whether a sequence is a new sequence (`True`) or a
         continuation of the previous one (`False`)

    Returns:
        :class:`cntk.cntk_py.Value`: converted batch
    '''
    from ..cntk_py import Value

    if isinstance(batch, Value):
        return batch

    use_mask = False

    if isinstance(batch, np.ndarray):
        if batch.dtype == np.int:
            batch = batch.astype(np.float32)
        elif batch.dtype not in (np.float32, np.float64):
            raise ValueError('only float32 and float64 are supported')
    elif isinstance(batch, list):
        if is_tensor_list(batch):
            use_mask =  len(var.dynamic_axes) > 1

    if device is None:
        device = use_default_device()

    if not use_mask and seq_starts is not None:
        raise ValueError('specification of individual sequence begins does not'
                ' make sense when not using the sequence axis')

    # Use the mask, if we have additional dynamic axes besides the batch axis

    if use_mask:
        seq_lens = [len(seq) for seq in batch]

        try:
            num_seq = len(batch)
        except TypeError:
            raise ValueError('expected an object of type Value or a NumPy ' +
                             'array and not "%s"' % type(batch))

        from cntk.cntk_py import NDMask
        mask = NDMask((max(seq_lens), num_seq), device)
        for idx, seq_len in enumerate(seq_lens):
            if seq_starts is None:
                mask.mark_sequence_begin((0, idx))
            elif seq_starts[idx]:
                mask.mark_sequence_begin((0, idx))
            mask.invalidate_section((seq_len, idx),
                                    (cntk_py.InferredDimension, 1))

        # Then we pad the batch to rectangular shape
        if isinstance(batch, list):
            if len(batch) == 0:
                raise ValueError('batch is empty')

            batch = pad_to_dense(batch)

    # If it still is not an NumPy array, try brute force...
    if not isinstance(batch, np.ndarray):
        if data_type is None:
            data_type = get_data_type(var)
        batch = np.asarray(batch, dtype=data_type)

    # Maybe a NumPy dtype was given, but with lower accuracy than float32, then
    # convert it to float32
    if np.issubdtype(batch.dtype, int):
        batch = batch.astype(np.float32)

        if len(cntk_shape) == 0:
            raise ValueError('values should be an array of input samples')

    ndav = create_NDArrayView_from_NumPy(batch, device)

    if use_mask:
        value = Value(ndav, mask)
    else:
        value = Value(ndav)

    return value
コード例 #40
0
ファイル: __init__.py プロジェクト: jplu/CNTK
def sanitize_batch(var, batch, seq_starts=None, dtype=None, device=None):
    '''
    Convert to :class:`Value` with ``dtype``. If the samples in
    ``batch`` have different sequence lengths, pad them to max sequence length
    and create a mask.

    Args:
        var (:class:`~cntk.ops.variables.Variable`): variable node for which
         the ``batch`` is meant
        batch: batch input for `var`. It can be a pure Python structure (list
         of lists, ...), a combination of lists of NumPy arrays or SciPy
         sparse CSR matrices. Alternatively, it can also be the output of
         :func:`one_hot`.
        seq_starts (list of bool or None): if None, every sequence is
         treated as a new sequence. Otherwise, it is interpreted as a list of
         Booleans that tell whether a sequence is a new sequence (`True`) or a
         continuation of the previous one (`False`)
        device (:class:`~cntk.device.DeviceDescriptor`, default None): device
         this value should be put on

    Returns:
        :class:`Value`: converted batch that can be passed to the core API
    '''
    if isinstance(batch, cntk_py.Value):
        return batch

    if isinstance(batch, list):
        if len(batch) == 0:
            raise ValueError('batch is empty')

    # We need to figure out whether the data has a sequence axis. Note that
    # it is not enough to check whether the variable's dynamic axes include the
    # sequence axis, because the sequence axis might be omitted in the data if
    # it is not needed (CNTK core would then take care of this).
    batch_has_seq = _has_seq_dim(var, batch)

    is_dense = _is_dense(batch)

    if batch_has_seq or seq_starts:
        if isinstance(batch[0], list):
            seq_lens = [len(seq) for seq in batch]
        else:
            seq_lens = [seq.shape[0] for seq in batch]

        max_seq_len = max(seq_lens)

        # If the input is a list of lists of dense values, all of the same
        # length, we convert it into a NumPy array.
        if is_dense and len(set(seq_lens)) == 1:
            batch_has_seq = False
            batch = np.asarray(batch, dtype=var.dtype)

    if dtype is None:
        dtype = get_data_type(var)

    if device is None:
        device = use_default_device()

    # batch is now either a dense input that requires a mask, or it is sparse
    if batch_has_seq or seq_starts:
        mask = cntk_py.NDMask((len(batch), max_seq_len),
                device or use_default_device())
        for idx, seq_len in enumerate(seq_lens):
            if seq_starts is None or seq_starts[idx]:
                mask.mark_sequence_begin((0, idx))
            # The first parameter is provided as a vector of ints, and thus
            # won't be automatically reversed to col-major, because of which we
            # provide it as such.
            # The second parameter is specifying the rectangle of the mask that
            # is invalid. As C++ is taking an NDShape, and we reverse the shape
            # in the SWIG layer, we provide it here as row-major.
            mask.invalidate_section((seq_len, idx),
                                    (1, cntk_py.InferredDimension))
    else:
        mask = None

    if is_dense:
        if batch_has_seq:
            batch = _pad_dense_to_max_len(var, batch, max_seq_len)
        if not isinstance(batch, np.ndarray):
            batch = np.asarray(batch)
        ndav = _create_NDArrayView_from_NumPy(batch.astype(dtype), device)
        return Value(data=ndav, mask=mask)

    if isinstance(batch, np.ndarray):
        if np.issubdtype(batch.dtype, int):
            batch = batch.astype(var.dtype)
        elif batch.dtype not in (np.float32, np.float64):
            raise ValueError('only float32 and float64 are supported')

        ndav = _create_NDArrayView_from_NumPy(batch.astype(dtype), device)
        return Value(data=ndav, mask=mask)

    # There are three possibilities of providing sparse batches:
    # 1. batch is given as one big sparse array
    batch_is_sparse = sparse.issparse(batch)
    if batch_is_sparse:
        sparse_tmp = batch
    else:
        # 2. batch is given as a list of sparse arrays, each of which is a full
        #    sequence
        batch_has_sparse_sequences = batch_is_sparse or sparse.issparse(batch[0])
        if batch_has_sparse_sequences:
            sparse_tmp = batch[0]
        else:
            # 3. batch is given as a list of lists containing the sparse sequence
            #    elements
            batch_has_sparse_elements = batch_has_sparse_sequences or \
                    sparse.issparse(batch[0][0])
            if batch_has_sparse_elements:
                sparse_tmp = batch[0][0]

    if not sparse.isspmatrix_csr(sparse_tmp):
        raise ValueError("only CSR is supported as of now. Please "
                "convert your data using 'batch.tocsr()'")

    if batch_is_sparse or batch_has_sparse_sequences or \
            batch_has_sparse_elements:

        batch_shape = batch.shape if hasattr(batch, 'shape') else (len(batch),)
        sample_shape = var.shape

        if not batch_is_sparse:
            # batch is not one big sparse matrix, but a list of them (or a list
            # of lists of them), so we have to create one. Two  possibilities:
            # 1. Batch has sequence axis: only 1d sparse vectors are allowed.
            # 2. Ohterwise, 1d or 2d sparse tensors are allowed
            if batch_has_seq:
                shape = batch[0][0].shape
                if not (len(shape)==1 or len(shape)==2 and shape[0]==1):
                    raise ValueError('only 1D sparse vectors are supported in '
                            ' sequence data, you gave shape %s'%str(shape))
                # Pad and stack the sparse vectors.
                if batch_has_seq:
                    batch = _pad_sparse_seq_to_max_len(batch, max_seq_len)
                batch_shape += (max_seq_len,)
                # We are actually 1D. If rank==2, then the first dim is 1.
                sample_shape = sample_shape[-1]
            else:
                sample_shape = batch[0][0].shape
                if len(sample_shape) not in [1,2]:
                    raise ValueError('only 1D or 2D sparse vectors are supported')

            # Vertically stack sequences/samples
            batch = sparse.vstack(batch, format='csr')

            batch_shape += _as_tuple(sample_shape)

        ndav = cntk_py.NDArrayView(batch_shape, batch.data.astype(var.dtype),
                batch.indptr, batch.indices, device, False)

        return Value(data=ndav, mask=mask)

    else:
        raise ValueError('batch input not understood')
コード例 #41
0
    def create(var, batch, seq_starts=None, device=None, read_only=False):
        '''
        Creates a :class:`Value` object.

        Args:
            var (:class:`~cntk.ops.variables.Variable`): input variable into which
             ``batch`` is passed
            batch: batch input. It can be
             * a single NumPy array denoting the full minibatch
             * a list of NumPy arrays or SciPy sparse CSR matrices
            seq_starts (list of `bool`s or None): if None, every sequence is
             treated as a new sequence. Otherwise, it is interpreted as a list of
             Booleans that tell whether a sequence is a new sequence (`True`) or a
             continuation of the sequence in the same slot of the previous
             minibatch (`False`)
            device (:class:`~cntk.device.DeviceDescriptor`, default None): device
             this value should be put on
            read_only (bool, default False): whether the data is read only

        Returns:
            :class:`Value` object.
        '''
        if isinstance(batch, np.ndarray):
            # The outermost axis has to be Python list. If the user passes a
            # full minibatch as one NumPy array, we have to convert it.
            if batch.dtype == object:
                raise ValueError(
                    'dtype object is not supported. If this is a batch '
                    'of sequences, you need to pass them as a pure-Python list '
                    'of NumPy arrays')

            # FIXME if not seq_starts: directly pass it to Value constructor

            batch = list(batch)

        if not isinstance(batch, list):
            raise ValueError('batch has to be a list of NumPy arrays or '
                             'SciPy CSR matrices')

        list_of_ndavs = []

        # NDArrayViews are all created on CPU. The Value object later then will
        # move it to the requested device.
        cpu_dev = cpu()
        for sample in batch:
            if isinstance(sample, list):
                sample = np.asarray(sample, dtype=var.dtype)
                if sample.dtype != var.dtype:
                    raise ValueError('could not convert sample data to '
                                     'NumPy array')

            if not (isinstance(sample, np.ndarray) or sparse.issparse(sample)):
                raise ValueError(
                    'sample type "%s" is not supported. Please '
                    'provide the data as a Python list of NumPy arrays '
                    'or Scipy CSR matrices.' % type(sample))

            if np.issubdtype(sample.dtype, int):
                sample = sample.astype(var.dtype)
            elif sample.dtype not in (np.float32, np.float64):
                raise ValueError(
                    'only integer, float32 and float64 are supported, '
                    'you gave %s' % sample.dtype)
            else:
                sample = sample.astype(var.dtype)

            if isinstance(sample, np.ndarray):
                if not _is_c_contiguous(sample):
                    raise ValueError(
                        'supplied data is not C contiguous; use '
                        'np.ascontiguousarray (slow) or rearrange your data/computation'
                    )
                ndav = _create_NDArrayView_from_NumPy(sample, cpu_dev)

            elif sparse.issparse(sample):
                if not sparse.isspmatrix_csr(sample):
                    raise ValueError("only CSR is supported as of now. Please "
                                     "convert your data using 'tocsr()'")

                ndav = cntk_py.NDArrayView(sample.shape, sample.data,
                                           sample.indptr, sample.indices,
                                           cpu_dev, False)

            list_of_ndavs.append(ndav)

        return cntk_py.Value_create(_as_tuple(var.shape), list_of_ndavs,
                                    seq_starts or [], device
                                    or use_default_device(), read_only)
コード例 #42
0
ファイル: __init__.py プロジェクト: shadrack4292/CNTK
def create_NDArrayView_from_NumPy(nd, dev=None):
    if not dev:
        dev = use_default_device()

    return cntk_py.NDArrayView(nd, dev, False)