예제 #1
0
    def __init__(self, data_streams, max_samples = INFINITELY_REPEAT):
        from cntk import Variable
        if not data_streams:
            raise(ValueError('at least one stream must be specified, in the form name=data or name=(data, type)'))
        self._data = dict()         # [name] -> numpy.array or scipy.sparse.csr_matrix
        self._types = dict()        # [name] -> Variable._Type
        self._is_sequence = dict()  # [name] -> bool
        self._vars = dict()         # [name] -> Variable
        self._max_samples = max_samples

        # get the data and types from the input, and form streams array
        self._num_samples = -1  # total number of samples --must be the same for all args
        from scipy import sparse
        for name, arg in data_streams.items():
            if isinstance(arg, tuple):
                value, type = arg
                type = Variable._Type._sanitize(type)
                dynamic_axes = getattr(type, 'dynamic_axes', None)
                is_sequence = dynamic_axes and len(dynamic_axes) > 1
                if not isinstance(type, Variable._Type):
                    raise ValueError('type must be a CNTK variable type, e.g. Tensor[13]')
            else:
                value = arg
                is_sequence = False  # data without type cannot have a dynamic axis
                type = Variable._Type(is_sparse=isinstance(value, sparse.csr_matrix)) # shape implanted below
            if not isinstance(value[0] if isinstance(value, list) else value, (np.ndarray, sparse.csr_matrix, Value)):
                raise TypeError('data must be a numpy.array or scipy.sparse.csr_matrix, or a list of those')
            sample_shape = value[0].shape[1:] if is_sequence else value.shape[1:]
            if not type.shape_is_known:
                type = type.updated_with(shape=sample_shape) # implant the shape
            elif type.shape != sample_shape:
                ValueError("specified type's shape does not match the data's shape")
            try:
                dtype = value.dtype # numpy array and Value
            except:
                dtype = value[0].dtype # for lists
            try:
                type.dtype
            except:
                type = type.updated_with(dtype=dtype) # implant the dtype
            num_samples = MinibatchSourceFromData._get_len(value)
            if self._num_samples == -1:
                if num_samples == 0:
                    raise(ValueError('data is empty'))
                self._num_samples = num_samples
            elif self._num_samples != num_samples:
                raise TypeError('all data items must have the same first dimension')
            self._data[name] = value
            self._types[name] = type
            self._is_sequence[name] = is_sequence

        self._cursor = 0            # current position
        self._total_num_samples = 0 # total count; once the limit is reached, we stop returning data

        super(MinibatchSourceFromData, self).__init__()
예제 #2
0
파일: __init__.py 프로젝트: ashionlrq/CNTK
    def __init__(self, data_streams, max_samples = INFINITELY_REPEAT):
        from cntk import Variable
        if not data_streams:
            raise(ValueError('at least one stream must be specified, in the form name=data or name=(data, type)'))
        self._data = dict()         # [name] -> numpy.array or scipy.sparse.csr_matrix
        self._types = dict()        # [name] -> Variable._Type
        self._is_sequence = dict()  # [name] -> bool
        self._vars = dict()         # [name] -> Variable
        self._max_samples = max_samples

        # get the data and types from the input, and form streams array
        self._num_samples = -1  # total number of samples --must be the same for all args
        from scipy import sparse
        for name, arg in data_streams.items():
            if isinstance(arg, tuple):
                value, type = arg
                type = Variable._Type._sanitize(type)
                dynamic_axes = getattr(type, 'dynamic_axes', None)
                is_sequence = dynamic_axes and len(dynamic_axes) > 1
                if not isinstance(type, Variable._Type):
                    raise ValueError('type must be a CNTK variable type, e.g. Tensor[13]')
            else:
                value = arg
                is_sequence = False  # data without type cannot have a dynamic axis
                type = Variable._Type(is_sparse=isinstance(value, sparse.csr_matrix)) # shape implanted below
            if not isinstance(value[0] if isinstance(value, list) else value, (np.ndarray, sparse.csr_matrix, Value)):
                raise TypeError('data must be a numpy.array or scipy.sparse.csr_matrix, or a list of those')
            sample_shape = value[0].shape[1:] if is_sequence else value.shape[1:]
            if not type.shape_is_known:
                type = type.updated_with(shape=sample_shape) # implant the shape
            elif type.shape != sample_shape:
                ValueError("specified type's shape does not match the data's shape")
            try:
                dtype = value.dtype # numpy array and Value
            except:
                dtype = value[0].dtype # for lists
            try:
                type.dtype
            except:
                type = type.updated_with(dtype=dtype) # implant the dtype
            num_samples = MinibatchSourceFromData._get_len(value)
            if self._num_samples == -1:
                if num_samples == 0:
                    raise(ValueError('data is empty'))
                self._num_samples = num_samples
            elif self._num_samples != num_samples:
                raise TypeError('all data items must have the same first dimension')
            self._data[name] = value
            self._types[name] = type
            self._is_sequence[name] = is_sequence

        self._cursor = 0            # current position
        self._total_num_samples = 0 # total count; once the limit is reached, we stop returning data

        super(MinibatchSourceFromData, self).__init__()
예제 #3
0
def variable(shape, data_type=None, needs_gradient=False, is_sparse=False, 
            dynamic_axes = [Axis.default_dynamic_axis(), Axis.default_batch_axis()], name=''):
    '''
    It creates an input node. The graph requires a separate reader that will be
    fed to this input.

    Args:
        shape (tuple or int): the shape of the input tensor     
        data_type: np.float32 or np.float64
        needs_gradients (bool): whether to back-propagates to it or not
        is_sparse (bool): whether the variable is sparse
        dynamic_axes (list): a list of dynamic axis (e.g., batch axis, time axis)
        name (str): the name of the node in the network
        
    Returns:
        :class:`cntk.Function`
    '''
    from .variables import Variable

    # TODO dynamic axis for numpy arrays
    # TODO sparse for numpy arrays
    return Variable(shape, data_type, needs_gradient, is_sparse, dynamic_axes, name)