コード例 #1
0
ファイル: evaluate.py プロジェクト: spawn08/rasa
def _run_markers(
    seed: Optional[int],
    count: Optional[int],
    endpoint_config: Path,
    domain_path: Optional[Text],
    strategy: Text,
    config: Path,
    output_filename: Path,
    stats_file_prefix: Optional[Path] = None,
) -> None:
    """Run markers algorithm over specified config and tracker store.

    Args:
        seed: (Optional) The seed to initialise the random number generator for
              use with the 'sample' strategy.
        count: (Optional) Number of trackers to extract from (for any strategy
               except 'all').
        endpoint_config: Path to the endpoint configuration defining the tracker
                         store to use.
        domain_path: Path to the domain specification to use when validating the
                     marker definitions.
        strategy: Strategy to use when selecting trackers to extract from.
        config: Path to the markers definition file to use.
        output_filename: Path to write out the extracted markers.
        stats_file_prefix: (Optional) A prefix used to create paths where files with
            statistics on the marker extraction results will be written.
            It must consists of the path to the where those files should be stored
            and the common file prefix, e.g. '<path-to-stats-folder>/statistics'.
            Statistics derived from all marker extractions will be stored in
            '<path-to-stats-folder>/statistics-overall.csv', while the statistics
            computed per session will be stored in
            '<path-to-stats-folder>/statistics-per-session.csv'.
    """
    telemetry.track_markers_extraction_initiated(
        strategy=strategy,
        only_extract=stats_file_prefix is not None,
        seed=seed is not None,
        count=count,
    )

    domain = Domain.load(domain_path) if domain_path else None
    markers = Marker.from_path(config)
    if domain and not markers.validate_against_domain(domain):
        rasa.shared.utils.cli.print_error_and_exit(
            "Validation errors were found in the markers definition. "
            "Please see errors listed above and fix before running again."
        )

    # Calculate telemetry
    # All loaded markers are combined with one virtual OR over all markers
    num_markers = len(markers.sub_markers)
    max_depth = markers.max_depth() - 1
    # Find maximum branching of marker
    branching_factor = max(
        (
            len(sub_marker.sub_markers)
            for marker in markers.sub_markers
            for sub_marker in marker.flatten()
            if isinstance(sub_marker, OperatorMarker)
        ),
        default=0,
    )

    telemetry.track_markers_parsed_count(num_markers, max_depth, branching_factor)

    tracker_loader = _create_tracker_loader(
        endpoint_config, strategy, domain, count, seed
    )

    def _append_suffix(path: Optional[Path], suffix: Text) -> Optional[Path]:
        return path.parent / (path.name + suffix) if path else None

    try:
        markers.evaluate_trackers(
            trackers=tracker_loader.load(),
            output_file=output_filename,
            session_stats_file=_append_suffix(stats_file_prefix, STATS_SESSION_SUFFIX),
            overall_stats_file=_append_suffix(stats_file_prefix, STATS_OVERALL_SUFFIX),
        )
    except (FileExistsError, NotADirectoryError) as e:
        rasa.shared.utils.cli.print_error_and_exit(message=str(e))
コード例 #2
0
ファイル: test_telemetry.py プロジェクト: zoovu/rasa
async def test_events_schema(monkeypatch: MonkeyPatch, default_agent: Agent,
                             config_path: Text):
    # this allows us to patch the printing part used in debug mode to collect the
    # reported events
    monkeypatch.setenv("RASA_TELEMETRY_DEBUG", "true")
    monkeypatch.setenv("RASA_TELEMETRY_ENABLED", "true")

    mock = Mock()
    monkeypatch.setattr(telemetry, "print_telemetry_event", mock)

    with open(TELEMETRY_EVENTS_JSON) as f:
        schemas = json.load(f)["events"]
    initial = asyncio.all_tasks()
    # Generate all known backend telemetry events, and then use events.json to
    # validate their schema.
    training_data = TrainingDataImporter.load_from_config(config_path)

    with telemetry.track_model_training(training_data, "rasa"):
        await asyncio.sleep(1)

    telemetry.track_telemetry_disabled()

    telemetry.track_data_split(0.5, "nlu")

    telemetry.track_validate_files(True)

    telemetry.track_data_convert("yaml", "nlu")

    telemetry.track_tracker_export(5, TrackerStore(domain=None), EventBroker())

    telemetry.track_interactive_learning_start(True, False)

    telemetry.track_server_start([CmdlineInput()], None, None, 42, True)

    telemetry.track_project_init("tests/")

    telemetry.track_shell_started("nlu")

    telemetry.track_rasa_x_local()

    telemetry.track_visualization()

    telemetry.track_core_model_test(5, True, default_agent)

    telemetry.track_nlu_model_test(TrainingData())

    telemetry.track_markers_extraction_initiated("all", False, False, None)

    telemetry.track_markers_extracted(1)

    telemetry.track_markers_stats_computed(1)

    telemetry.track_markers_parsed_count(1, 1, 1)

    pending = asyncio.all_tasks() - initial
    await asyncio.gather(*pending)

    assert mock.call_count == 19

    for args, _ in mock.call_args_list:
        event = args[0]
        # `metrics_id` automatically gets added to all event but is
        # not part of the schema so we need to remove it before validation
        del event["properties"]["metrics_id"]
        jsonschema.validate(instance=event["properties"],
                            schema=schemas[event["event"]])