Beispiel #1
0
def test_reduce_raises_if_combining_different_origins_or_attributes(
        differ: Text):
    # create features accordingly
    arbitrary_fixed_type = FEATURE_TYPE_SENTENCE
    features_list = []
    for idx in range(2):
        first_dim = 1
        arbitrary_matrix_matching_type = np.full(shape=(first_dim, 1),
                                                 fill_value=1)
        config = dict(
            features=arbitrary_matrix_matching_type,
            attribute="fixed-attribute"
            if differ != "attribute" else f"attr-{idx}",
            feature_type=arbitrary_fixed_type,
            origin="fixed-origin" if differ != "origin" else f"origin-{idx}",
        )
        feat = Features(**config)
        features_list.append(feat)

    # reduce!
    if differ == "attribute":
        message = "Expected all Features to describe the same attribute"
        expected_origin = ["origin"]
    else:
        message = "Expected 'origin-1' to be the origin of the 0-th"
        expected_origin = ["origin-1"]
    with pytest.raises(ValueError, match=message):
        Features.reduce(features_list, expected_origins=expected_origin)
Beispiel #2
0
def test_reduce(shuffle_mode: Text,
                num_features_per_combination: Tuple[int, int, int, int]):

    # all combinations - in the expected order
    # (i.e. all sparse before all dense and sequence before sentence)
    all_combinations = [
        (FEATURE_TYPE_SEQUENCE, True),
        (FEATURE_TYPE_SENTENCE, True),
        (FEATURE_TYPE_SEQUENCE, False),
        (FEATURE_TYPE_SENTENCE, False),
    ]

    # multiply accordingly and mess up the order
    chosen_combinations = [
        spec
        for spec, num in zip(all_combinations, num_features_per_combination)
        for _ in range(num)
    ]
    if shuffle_mode == "reversed":
        messed_up_order = reversed(chosen_combinations)
    else:
        # Note: rng.permutation would mess up the types
        rng = np.random.default_rng(23452345)
        permutation = rng.permutation(len(chosen_combinations))
        messed_up_order = [chosen_combinations[idx] for idx in permutation]

    # create features accordingly
    features_list = []
    for idx, (type, is_sparse) in enumerate(messed_up_order):
        first_dim = 1 if type == FEATURE_TYPE_SEQUENCE else 3
        matrix = np.full(shape=(first_dim, 1), fill_value=1)
        if is_sparse:
            matrix = scipy.sparse.coo_matrix(matrix)
        config = dict(
            features=matrix,
            attribute="fixed-attribute",  # must be the same
            feature_type=type,
            origin="origin-does-matter-here",  # must be the same
        )
        feat = Features(**config)
        features_list.append(feat)

    # reduce!
    reduced_list = Features.reduce(features_list)
    assert len(reduced_list) == sum(num > 0
                                    for num in num_features_per_combination)
    idx = 0
    for num, (type, is_sparse) in zip(num_features_per_combination,
                                      all_combinations):
        if num == 0:
            # nothing to check here - because we already checked the length above
            # and check the types and shape of all existing features in this loop
            pass
        else:
            feature = reduced_list[idx]
            assert feature.is_sparse() == is_sparse
            assert feature.type == type
            assert feature.features.shape[-1] == num
            idx += 1
Beispiel #3
0
    def _extract_state_features(
        self,
        sub_state: SubState,
        precomputations: Optional[MessageContainerForCoreFeaturization],
        sparse: bool = False,
    ) -> Dict[Text, List[Features]]:

        # Remove entities from possible attributes
        attributes = set(
            attribute for attribute in sub_state.keys() if attribute != ENTITIES
        )

        if precomputations is not None:

            # Collect features for all those attributes
            attributes_to_features = precomputations.collect_features(
                sub_state, attributes=attributes
            )
            # if features for INTENT or ACTION_NAME exist,
            # they are always sparse sequence features;
            # transform them to sentence sparse features
            if attributes_to_features.get(INTENT):
                attributes_to_features[INTENT] = self._to_sparse_sentence_features(
                    attributes_to_features[INTENT]
                )
            if attributes_to_features.get(ACTION_NAME):
                attributes_to_features[ACTION_NAME] = self._to_sparse_sentence_features(
                    attributes_to_features[ACTION_NAME]
                )

            # Combine and sort the features:
            # Per attribute, combine features of same type and level into one Feature,
            # and (if there are any such features) store the results in a list where
            # - all the sparse features are listed first and a
            # - sequence feature is always listed before the sentence feature of the
            #   same type (sparse/not sparse).
            output = {
                attribute: Features.reduce(
                    features_list=features_list, expected_origins=None
                )
                for attribute, features_list in attributes_to_features.items()
                if len(features_list) > 0  # otherwise, following will fail
            }
        else:
            output = {}

        # Check that the name attribute has features
        name_attribute = self._get_name_attribute(attributes)
        if name_attribute and name_attribute not in output:
            # nlu pipeline didn't create features for user or action
            # this might happen, for example, when we have action_name in the state
            # but it did not get featurized because only character level
            # CountVectorsFeaturizer was included in the config.
            output[name_attribute] = self._create_features(
                sub_state, name_attribute, sparse
            )
        return output