Example #1
0
    def prepare_batch(
        data: Data,
        start: Optional[int] = None,
        end: Optional[int] = None,
        tuple_sizes: Optional[Dict[Text, int]] = None,
    ) -> Tuple[Optional[np.ndarray], ...]:
        """Slices model data into batch using given start and end value.

        Args:
            data: The data to prepare.
            start: The start index of the batch
            end: The end index of the batch
            tuple_sizes: In case the feature is not present we propagate the batch with
              None. Tuple sizes contains the number of how many None values to add for
              what kind of feature.

        Returns:
            The features of the batch.
        """
        batch_data = []

        for key, attribute_data in data.items():
            for sub_key, f_data in attribute_data.items():
                # add None for not present values during processing
                if not f_data:
                    if tuple_sizes:
                        batch_data += [None] * tuple_sizes[key]
                    else:
                        batch_data.append(None)
                    continue

                for v in f_data:
                    if start is not None and end is not None:
                        _data = v[start:end]
                    elif start is not None:
                        _data = v[start:]
                    elif end is not None:
                        _data = v[:end]
                    else:
                        _data = v[:]

                    if _data.is_sparse:
                        batch_data.extend(
                            RasaDataGenerator._scipy_matrix_to_values(_data))
                    else:
                        batch_data.append(
                            RasaDataGenerator._pad_dense_data(_data))

        # len of batch_data is equal to the number of keys in model data
        return tuple(batch_data)
Example #2
0
    def _preprocess_data(self, data: Data) -> np.ndarray:
        """
        Turn data into np.ndarray for sklearn training; dialogue history features
        are flattened.
        Args:
            data: training data containing all the features
        Returns:
            Training_data: shape [num_dialogs x (max_history * all_features)];
            all_features - sum of number of features of
            intent, action_name, entities, forms, slots.
        """
        if TEXT in data or ACTION_TEXT in data:
            raise Exception(f"{self.__name__} cannot be applied to text data. "
                            f"Try to use TEDPolicy instead. ")

        attribute_data = {
            attribute: self._get_features_for_attribute(attribute_data)
            for attribute, attribute_data in data.items()
        }
        # turning it into OrderedDict so that the order of features is the same
        attribute_data = OrderedDict(attribute_data)
        return np.concatenate(list(attribute_data.values()), axis=-1)