def test_marker_from_path_raises(tmp_path: Path, path_config_tuples: List[Tuple[Text, Any]]): for path_to_yaml, config in path_config_tuples: full_path = tmp_path / path_to_yaml folder = full_path.parents[0] if folder != tmp_path: Path.mkdir(folder, exist_ok=False) rasa.shared.utils.io.write_yaml(data=config, target=full_path) with pytest.raises(InvalidMarkerConfig): Marker.from_path(tmp_path)
def test_marker_from_config(): config = { AndMarker.positive_tag(): [ { SlotSetMarker.positive_tag(): "s1" }, { OrMarker.positive_tag(): [ { IntentDetectedMarker.positive_tag(): "4" }, { IntentDetectedMarker.negated_tag(): "6" }, ] }, ] } marker = Marker.from_config(config) assert isinstance(marker, AndMarker) assert isinstance(marker.sub_markers[0], SlotSetMarker) or_marker = marker.sub_markers[1] assert isinstance(or_marker, OrMarker) for sub_marker in or_marker.sub_markers: assert isinstance(sub_marker, ConditionMarker)
def test_split_sessions(tmp_path): """Tests loading a tracker with multiple sessions.""" events = [ ActionExecuted(ACTION_SESSION_START_NAME), SessionStarted(), UserUttered(intent={"name": "this-intent"}), ] sessions = Marker._split_sessions(events) assert len(sessions) == 1 assert len(sessions[0][0]) == len(events)
def test_marker_from_path_adds_special_or_marker(tmp_path: Path, configs: Any): yaml_file = tmp_path / "config.yml" rasa.shared.utils.io.write_yaml(data=configs, target=yaml_file) loaded = Marker.from_path(tmp_path) assert isinstance(loaded, OrMarker) assert loaded.name == Marker.ANY_MARKER assert len(loaded.sub_markers) == len(configs) assert all( isinstance(sub_marker, IntentDetectedMarker) for sub_marker in loaded.sub_markers)
def test_marker_from_path_only_reads_yamls(tmp_path: Path): suffixes = [("yaml", True), ("yml", True), ("yaeml", False), ("config", False)] for idx, (suffix, allowed) in enumerate(suffixes): config = { f"marker-{idx}": { IntentDetectedMarker.positive_tag(): "intent" } } config_file = tmp_path / f"config-{idx}.{suffix}" rasa.shared.utils.io.write_yaml(data=config, target=config_file) loaded = Marker.from_path(tmp_path) assert len(loaded.sub_markers) == sum(allowed for _, allowed in suffixes) assert set(sub_marker.name for sub_marker in loaded.sub_markers) == set( f"marker-{idx}" for idx, (_, allowed) in enumerate(suffixes) if allowed)
def test_marker_depth(marker: Marker, expected_depth: int): assert marker.max_depth() == expected_depth
def test_marker_validation_raises(config: Any): with pytest.raises(InvalidMarkerConfig): Marker.from_config(config)
def _collect_parameters(marker: Marker, condition_type: Type[ConditionMarker]) -> Set[Text]: return set(sub_marker.text for sub_marker in marker.flatten() if isinstance(sub_marker, condition_type))
def _run_markers( seed: Optional[int], count: Optional[int], endpoint_config: Path, domain_path: Optional[Text], strategy: Text, config: Path, output_filename: Path, stats_file_prefix: Optional[Path] = None, ) -> None: """Run markers algorithm over specified config and tracker store. Args: seed: (Optional) The seed to initialise the random number generator for use with the 'sample' strategy. count: (Optional) Number of trackers to extract from (for any strategy except 'all'). endpoint_config: Path to the endpoint configuration defining the tracker store to use. domain_path: Path to the domain specification to use when validating the marker definitions. strategy: Strategy to use when selecting trackers to extract from. config: Path to the markers definition file to use. output_filename: Path to write out the extracted markers. stats_file_prefix: (Optional) A prefix used to create paths where files with statistics on the marker extraction results will be written. It must consists of the path to the where those files should be stored and the common file prefix, e.g. '<path-to-stats-folder>/statistics'. Statistics derived from all marker extractions will be stored in '<path-to-stats-folder>/statistics-overall.csv', while the statistics computed per session will be stored in '<path-to-stats-folder>/statistics-per-session.csv'. """ telemetry.track_markers_extraction_initiated( strategy=strategy, only_extract=stats_file_prefix is not None, seed=seed is not None, count=count, ) domain = Domain.load(domain_path) if domain_path else None markers = Marker.from_path(config) if domain and not markers.validate_against_domain(domain): rasa.shared.utils.cli.print_error_and_exit( "Validation errors were found in the markers definition. " "Please see errors listed above and fix before running again." ) # Calculate telemetry # All loaded markers are combined with one virtual OR over all markers num_markers = len(markers.sub_markers) max_depth = markers.max_depth() - 1 # Find maximum branching of marker branching_factor = max( ( len(sub_marker.sub_markers) for marker in markers.sub_markers for sub_marker in marker.flatten() if isinstance(sub_marker, OperatorMarker) ), default=0, ) telemetry.track_markers_parsed_count(num_markers, max_depth, branching_factor) tracker_loader = _create_tracker_loader( endpoint_config, strategy, domain, count, seed ) def _append_suffix(path: Optional[Path], suffix: Text) -> Optional[Path]: return path.parent / (path.name + suffix) if path else None try: markers.evaluate_trackers( trackers=tracker_loader.load(), output_file=output_filename, session_stats_file=_append_suffix(stats_file_prefix, STATS_SESSION_SUFFIX), overall_stats_file=_append_suffix(stats_file_prefix, STATS_OVERALL_SUFFIX), ) except (FileExistsError, NotADirectoryError) as e: rasa.shared.utils.cli.print_error_and_exit(message=str(e))