Exemple #1
0
def _join_with_sampling(combo: Sequence[Sequence[T]],
                        combo_options: ComboOptions) -> Iterable[Sequence[T]]:
    # Given ((hey speaker, hi speaker), (play, start)), there are 4 possible combinations (2x2=4)
    # choose a random number [0, 3] to represent a random combination.
    # Map that chosen number back to the indices that make the combination.
    # For instance, 0 could map to (0, 0), which would yield "hey speaker play"
    # 1 could map to (1, 0), which would yield "hi speaker play", etc.
    component_lengths = tuple(len(item) for item in combo)
    num_unique_samples = _mul(component_lengths)

    if combo_options.with_replacement:
        sample_size = combo_options.max_sample_size
        flat_item_indices = tuple(
            random.randint(0, num_unique_samples - 1)
            for _ in range(sample_size))
    else:
        if num_unique_samples <= sys.maxsize:
            sample_size = min(combo_options.max_sample_size,
                              num_unique_samples)
            if sample_size == num_unique_samples:
                yield from _join_without_sampling(combo)
                return
        else:
            num_unique_samples = sys.maxsize
            sample_size = min(combo_options.max_sample_size,
                              num_unique_samples)

            logger = get_logger(__name__)
            warning_msg = (
                'Number of possible combinations exceeds sys.maxsize.'
                ' Sampling from the subset of combinations, sys.maxsize.')
            logger.warning(warning_msg)

            if sample_size == sys.maxsize:  # pragma: no cover
                yield from _join_without_sampling(combo)
                return
        flat_item_indices = tuple(
            random.sample(range(num_unique_samples), sample_size))

    for flat_item_index in flat_item_indices:
        component_indices = _one_d_to_mult_d(flat_item_index,
                                             component_lengths)
        combo_components = tuple(
            combo[component_index][item_index]
            for component_index, item_index in enumerate(component_indices))
        yield combo_components
Exemple #2
0
    def from_preset(cls: Type[T_PIPELINE],
                    preset: Union[str, Callable, Sequence[Union[str, Callable]]],
                    *args: Any,
                    **kwargs: Any) -> T_PIPELINE:
        """Instantiates 'Pipeline' from a preset configuration.

        There are two ways to use 'from_preset'. The simplest way is to use the
        preset's name. However, presets may have optional arguments that allow
        for more control. In that case, use a call to the preset's method, 'preset',
        with the desired arguments.

        Args:
            preset: A str that is the preset's name, a Callable that is the
                result of calling the preset's 'preset' function, or a Sequence
                of the two. The Callable form allows more control over the
                preset's behavior. If a Sequence is specified, the result of
                calling the presets' 'preset' function may only overlap in
                'combo_hooks_map' and 'expansion_hooks_map'. If there is overlap,
                functions will be applied in the order of the Sequence.

            args: See __init__ docstring.

            kwargs: See __init__ docstring.

        Raises:
            ValueError: If presets or kwargs contain the same keys, and those
                keys are not 'combo_hooks_map' or 'expansion_hooks_map'.

        Returns:
            An instance of Pipeline.

        Examples:
            Preset str

            >>> from pathlib import Path
            >>> from putput.pipeline import Pipeline
            >>> pattern_def_path = Path(__file__).parent.parent / 'tests' / 'doc' / 'example_pattern_definition.yml'
            >>> dynamic_token_patterns_map = {'ITEM': ('fries',)}
            >>> p = Pipeline.from_preset('IOB2',
            ...                          pattern_def_path,
            ...                          dynamic_token_patterns_map=dynamic_token_patterns_map)
            >>> generator = p.flow(disable_progress_bar=True)
            >>> for utterance, tokens, groups in generator:
            ...     print(utterance)
            ...     print(tokens)
            ...     print(groups)
            ...     break
            can she get fries can she get fries and fries
            ('B-ADD I-ADD I-ADD', 'B-ITEM', 'B-ADD I-ADD I-ADD', 'B-ITEM', 'B-CONJUNCTION', 'B-ITEM')
            ('B-ADD_ITEM I-ADD_ITEM I-ADD_ITEM I-ADD_ITEM', 'B-ADD_ITEM I-ADD_ITEM I-ADD_ITEM I-ADD_ITEM',
            'B-None', 'B-None')

            Preset function with arguments

            >>> from putput.presets import iob2
            >>> p = Pipeline.from_preset(iob2.preset(tokens_to_include=('ITEM',), groups_to_include=('ADD_ITEM',)),
            ...                          pattern_def_path,
            ...                          dynamic_token_patterns_map=dynamic_token_patterns_map)
            >>> generator = p.flow(disable_progress_bar=True)
            >>> for utterance, tokens, groups in generator:
            ...     print(utterance)
            ...     print(tokens)
            ...     print(groups)
            ...     break
            can she get fries can she get fries and fries
            ('O O O', 'B-ITEM', 'O O O', 'B-ITEM', 'O', 'B-ITEM')
            ('B-ADD_ITEM I-ADD_ITEM I-ADD_ITEM I-ADD_ITEM', 'B-ADD_ITEM I-ADD_ITEM I-ADD_ITEM I-ADD_ITEM', 'O', 'O')
        """
        if args:
            pattern_def = _load_pattern_def(args[0])
        else:
            pattern_def = _load_pattern_def(kwargs['pattern_def_path'])

        intent_entities_kwargs = {'__intent_map_from_pipeline':_extract_intent_map(pattern_def),
                                  '__entities_from_pipeline':_extract_entities(pattern_def)}
        if isinstance(preset, str):
            init_kwargs = get_preset(preset)(**intent_entities_kwargs)
        elif isinstance(preset, Sequence):
            warning = ('Presets are not guaranteed to work together. Choose presets that logically fit together. '
                       'When in doubt, check the shapes of the return values of the hooks '
                       'as well the transformations done in the handlers.')
            logger = get_logger(__name__)
            logger.warning(warning)
            for pre in preset: # type: ignore
                if isinstance(pre, str):
                    init_kwargs = get_preset(pre)(**intent_entities_kwargs)
                else:
                    init_kwargs = pre(**intent_entities_kwargs)
                try:
                    accumulated_init_kwargs = _merge_kwargs(accumulated_init_kwargs, init_kwargs)
                except NameError:
                    accumulated_init_kwargs = _merge_kwargs({}, init_kwargs)
            init_kwargs = accumulated_init_kwargs
        else:
            init_kwargs = preset(**intent_entities_kwargs)
        init_kwargs = _merge_kwargs(init_kwargs, kwargs)
        return cls(*args, **init_kwargs)
Exemple #3
0
 def test_name(self) -> None:
     logger = get_logger(__name__)
     self.assertEqual(logger.name, 'tests.unit.test_logger')
Exemple #4
0
 def test_stderr(self) -> None:
     logger = get_logger(__name__, stream=sys.stderr, level=logging.DEBUG)
     with self.assertLogs(logger, level='INFO') as cm:
         logger.error('stderr')
         self.assertEqual(cm.output, ['ERROR:{}:stderr'.format(__name__)])
Exemple #5
0
 def test_formatter_uses_time(self) -> None:
     logger = get_logger(__name__)
     self.assertTrue(
         logger.handlers[0].formatter.usesTime())  # type: ignore
Exemple #6
0
 def test_singleton(self) -> None:
     for _ in range(2):
         logger = get_logger(__name__)
         self.assertEqual(len(logger.handlers), 1)
Exemple #7
0
 def test_default_level(self) -> None:
     logger = get_logger(__name__)
     self.assertEqual(logger.level, logging.INFO)