def _join_with_sampling(combo: Sequence[Sequence[T]], combo_options: ComboOptions) -> Iterable[Sequence[T]]: # Given ((hey speaker, hi speaker), (play, start)), there are 4 possible combinations (2x2=4) # choose a random number [0, 3] to represent a random combination. # Map that chosen number back to the indices that make the combination. # For instance, 0 could map to (0, 0), which would yield "hey speaker play" # 1 could map to (1, 0), which would yield "hi speaker play", etc. component_lengths = tuple(len(item) for item in combo) num_unique_samples = _mul(component_lengths) if combo_options.with_replacement: sample_size = combo_options.max_sample_size flat_item_indices = tuple( random.randint(0, num_unique_samples - 1) for _ in range(sample_size)) else: if num_unique_samples <= sys.maxsize: sample_size = min(combo_options.max_sample_size, num_unique_samples) if sample_size == num_unique_samples: yield from _join_without_sampling(combo) return else: num_unique_samples = sys.maxsize sample_size = min(combo_options.max_sample_size, num_unique_samples) logger = get_logger(__name__) warning_msg = ( 'Number of possible combinations exceeds sys.maxsize.' ' Sampling from the subset of combinations, sys.maxsize.') logger.warning(warning_msg) if sample_size == sys.maxsize: # pragma: no cover yield from _join_without_sampling(combo) return flat_item_indices = tuple( random.sample(range(num_unique_samples), sample_size)) for flat_item_index in flat_item_indices: component_indices = _one_d_to_mult_d(flat_item_index, component_lengths) combo_components = tuple( combo[component_index][item_index] for component_index, item_index in enumerate(component_indices)) yield combo_components
def from_preset(cls: Type[T_PIPELINE], preset: Union[str, Callable, Sequence[Union[str, Callable]]], *args: Any, **kwargs: Any) -> T_PIPELINE: """Instantiates 'Pipeline' from a preset configuration. There are two ways to use 'from_preset'. The simplest way is to use the preset's name. However, presets may have optional arguments that allow for more control. In that case, use a call to the preset's method, 'preset', with the desired arguments. Args: preset: A str that is the preset's name, a Callable that is the result of calling the preset's 'preset' function, or a Sequence of the two. The Callable form allows more control over the preset's behavior. If a Sequence is specified, the result of calling the presets' 'preset' function may only overlap in 'combo_hooks_map' and 'expansion_hooks_map'. If there is overlap, functions will be applied in the order of the Sequence. args: See __init__ docstring. kwargs: See __init__ docstring. Raises: ValueError: If presets or kwargs contain the same keys, and those keys are not 'combo_hooks_map' or 'expansion_hooks_map'. Returns: An instance of Pipeline. Examples: Preset str >>> from pathlib import Path >>> from putput.pipeline import Pipeline >>> pattern_def_path = Path(__file__).parent.parent / 'tests' / 'doc' / 'example_pattern_definition.yml' >>> dynamic_token_patterns_map = {'ITEM': ('fries',)} >>> p = Pipeline.from_preset('IOB2', ... pattern_def_path, ... dynamic_token_patterns_map=dynamic_token_patterns_map) >>> generator = p.flow(disable_progress_bar=True) >>> for utterance, tokens, groups in generator: ... print(utterance) ... print(tokens) ... print(groups) ... break can she get fries can she get fries and fries ('B-ADD I-ADD I-ADD', 'B-ITEM', 'B-ADD I-ADD I-ADD', 'B-ITEM', 'B-CONJUNCTION', 'B-ITEM') ('B-ADD_ITEM I-ADD_ITEM I-ADD_ITEM I-ADD_ITEM', 'B-ADD_ITEM I-ADD_ITEM I-ADD_ITEM I-ADD_ITEM', 'B-None', 'B-None') Preset function with arguments >>> from putput.presets import iob2 >>> p = Pipeline.from_preset(iob2.preset(tokens_to_include=('ITEM',), groups_to_include=('ADD_ITEM',)), ... pattern_def_path, ... dynamic_token_patterns_map=dynamic_token_patterns_map) >>> generator = p.flow(disable_progress_bar=True) >>> for utterance, tokens, groups in generator: ... print(utterance) ... print(tokens) ... print(groups) ... break can she get fries can she get fries and fries ('O O O', 'B-ITEM', 'O O O', 'B-ITEM', 'O', 'B-ITEM') ('B-ADD_ITEM I-ADD_ITEM I-ADD_ITEM I-ADD_ITEM', 'B-ADD_ITEM I-ADD_ITEM I-ADD_ITEM I-ADD_ITEM', 'O', 'O') """ if args: pattern_def = _load_pattern_def(args[0]) else: pattern_def = _load_pattern_def(kwargs['pattern_def_path']) intent_entities_kwargs = {'__intent_map_from_pipeline':_extract_intent_map(pattern_def), '__entities_from_pipeline':_extract_entities(pattern_def)} if isinstance(preset, str): init_kwargs = get_preset(preset)(**intent_entities_kwargs) elif isinstance(preset, Sequence): warning = ('Presets are not guaranteed to work together. Choose presets that logically fit together. ' 'When in doubt, check the shapes of the return values of the hooks ' 'as well the transformations done in the handlers.') logger = get_logger(__name__) logger.warning(warning) for pre in preset: # type: ignore if isinstance(pre, str): init_kwargs = get_preset(pre)(**intent_entities_kwargs) else: init_kwargs = pre(**intent_entities_kwargs) try: accumulated_init_kwargs = _merge_kwargs(accumulated_init_kwargs, init_kwargs) except NameError: accumulated_init_kwargs = _merge_kwargs({}, init_kwargs) init_kwargs = accumulated_init_kwargs else: init_kwargs = preset(**intent_entities_kwargs) init_kwargs = _merge_kwargs(init_kwargs, kwargs) return cls(*args, **init_kwargs)
def test_name(self) -> None: logger = get_logger(__name__) self.assertEqual(logger.name, 'tests.unit.test_logger')
def test_stderr(self) -> None: logger = get_logger(__name__, stream=sys.stderr, level=logging.DEBUG) with self.assertLogs(logger, level='INFO') as cm: logger.error('stderr') self.assertEqual(cm.output, ['ERROR:{}:stderr'.format(__name__)])
def test_formatter_uses_time(self) -> None: logger = get_logger(__name__) self.assertTrue( logger.handlers[0].formatter.usesTime()) # type: ignore
def test_singleton(self) -> None: for _ in range(2): logger = get_logger(__name__) self.assertEqual(len(logger.handlers), 1)
def test_default_level(self) -> None: logger = get_logger(__name__) self.assertEqual(logger.level, logging.INFO)