def test_experiment_analytics_search_by_nested_filter(sagemaker_session): with experiment(sagemaker_session) as experiment_name: search_exp = { "Filters": [ { "Name": "Parents.ExperimentName", "Operator": "Equals", "Value": experiment_name }, { "Name": "Parameters.hp1", "Operator": "GreaterThanOrEqualTo", "Value": "10" }, ] } analytics = ExperimentAnalytics(sagemaker_session=sagemaker_session, search_expression=search_exp) assert list(analytics.dataframe().columns) == [ "TrialComponentName", "DisplayName", "hp1", "Trials", "Experiments", ] assert ( len(analytics.dataframe()) > 5 ) # TODO [owen-t] Replace with == 10 and put test in retry block
def test_experiment_analytics(sagemaker_session): with experiment(sagemaker_session) as experiment_name: analytics = ExperimentAnalytics( experiment_name=experiment_name, sagemaker_session=sagemaker_session ) assert list(analytics.dataframe().columns) == ["TrialComponentName", "DisplayName", "hp1"]
def test_trial_analytics_dataframe_filter_trials_search_exp_with_sort(mock_session): mock_session.sagemaker_client.search.return_value = {"Results": []} search_exp = {"Filters": [{"Name": "Tags.someTag", "Operator": "Equals", "Value": "someValue"}]} analytics = ExperimentAnalytics( experiment_name="someExperiment", search_expression=search_exp, sort_by="Tags.someTag", sort_order="Ascending", sagemaker_session=mock_session, ) analytics.dataframe() expected_search_exp = { "Filters": [ {"Name": "Tags.someTag", "Operator": "Equals", "Value": "someValue"}, {"Name": "Parents.ExperimentName", "Operator": "Equals", "Value": "someExperiment"}, ] } mock_session.sagemaker_client.search.assert_called_with( Resource="ExperimentTrialComponent", SearchExpression=expected_search_exp, SortBy="Tags.someTag", SortOrder="Ascending", )
def test_experiment_analytics_pagination(sagemaker_session): sm = sagemaker_session.sagemaker_client experiment_name = "experiment" + str(uuid.uuid4()) sm.create_experiment(ExperimentName=experiment_name) # Search returns 10 results by default. Add 20 trials to verify pagination, for i in range(20): trial_name = "trial-" + str(uuid.uuid4()) sm.create_trial(TrialName=trial_name, ExperimentName=experiment_name) trial_component_name = "tc-" + str(uuid.uuid4()) sm.create_trial_component(TrialComponentName=trial_component_name, DisplayName="Training") sm.update_trial_component(TrialComponentName=trial_component_name, Parameters={"hp1": { "NumberValue": i }}) sm.associate_trial_component(TrialComponentName=trial_component_name, TrialName=trial_name) time.sleep( 15 ) # wait for search to get updated TODO [owen-t]: Replace with retry analytics = ExperimentAnalytics(experiment_name=experiment_name, sagemaker_session=sagemaker_session) assert list(analytics.dataframe().columns) == [ "TrialComponentName", "DisplayName", "hp1" ] assert (len(analytics.dataframe()) > 10 ) # TODO [owen-t] Replace with == 20 and put test in retry block
def test_experiment_analytics(sagemaker_session): sm = sagemaker_session.sagemaker_client experiment_name = "experiment-" + str(uuid.uuid4()) sm.create_experiment(ExperimentName=experiment_name) for i in range(5): trial_name = "trial-" + str(uuid.uuid4()) sm.create_trial(TrialName=trial_name, ExperimentName=experiment_name) trial_component_name = "tc-" + str(uuid.uuid4()) sm.create_trial_component(TrialComponentName=trial_component_name, DisplayName="Training") sm.update_trial_component(TrialComponentName=trial_component_name, Parameters={"hp1": { "NumberValue": i }}) sm.associate_trial_component(TrialComponentName=trial_component_name, TrialName=trial_name) time.sleep(15) # wait for search to get updated analytics = ExperimentAnalytics(experiment_name=experiment_name, sagemaker_session=sagemaker_session) assert list(analytics.dataframe().columns) == [ "TrialComponentName", "DisplayName", "hp1" ]
def test_trial_analytics_dataframe_search_pagination(mock_session): result_page_1 = { "Results": [{"TrialComponent": trial_component("trial-1")}], "NextToken": "nextToken", } result_page_2 = {"Results": [{"TrialComponent": trial_component("trial-2")}]} mock_session.sagemaker_client.search.side_effect = [result_page_1, result_page_2] analytics = ExperimentAnalytics(experiment_name="experiment1", sagemaker_session=mock_session) expected_dataframe = pd.DataFrame.from_dict( OrderedDict( [ ("TrialComponentName", ["trial-1", "trial-2"]), ("DisplayName", ["Training", "Training"]), ("SourceArn", ["some-source-arn", "some-source-arn"]), ("hp1", [1.0, 1.0]), ("hp2", ["abc", "abc"]), ("metric1 - Min", [3.0, 3.0]), ("metric1 - Max", [5.0, 5.0]), ("metric1 - Avg", [4.0, 4.0]), ("metric1 - StdDev", [1.0, 1.0]), ("metric1 - Last", [2.0, 2.0]), ("metric1 - Count", [2.0, 2.0]), ("metric2 - Min", [8.0, 8.0]), ("metric2 - Max", [10.0, 10.0]), ("metric2 - Avg", [9.0, 9.0]), ("metric2 - StdDev", [0.05, 0.05]), ("metric2 - Last", [7.0, 7.0]), ("metric2 - Count", [2.0, 2.0]), ("inputArtifacts1 - MediaType", ["text/plain", "text/plain"]), ("inputArtifacts1 - Value", ["s3:/foo/bar1", "s3:/foo/bar1"]), ("inputArtifacts2 - MediaType", ["text/plain", "text/plain"]), ("inputArtifacts2 - Value", ["s3:/foo/bar2", "s3:/foo/bar2"]), ("outputArtifacts1 - MediaType", ["text/csv", "text/csv"]), ("outputArtifacts1 - Value", ["s3:/sky/far1", "s3:/sky/far1"]), ("outputArtifacts2 - MediaType", ["text/csv", "text/csv"]), ("outputArtifacts2 - Value", ["s3:/sky/far2", "s3:/sky/far2"]), ] ) ) pd.testing.assert_frame_equal(expected_dataframe, analytics.dataframe()) expected_search_exp = { "Filters": [ {"Name": "Parents.ExperimentName", "Operator": "Equals", "Value": "experiment1"} ] } mock_session.sagemaker_client.search.assert_has_calls( [ mock.call(Resource="ExperimentTrialComponent", SearchExpression=expected_search_exp), mock.call( Resource="ExperimentTrialComponent", SearchExpression=expected_search_exp, NextToken="nextToken", ), ] )
def experiment_with_artifacts(sagemaker_session): sm = sagemaker_session.sagemaker_client trials = {} # for resource cleanup experiment_name = "experiment-" + str(uuid.uuid4()) try: sm.create_experiment(ExperimentName=experiment_name) # Search returns 10 results by default. Add 20 trials to verify pagination. for i in range(20): trial_name = "trial-" + str(uuid.uuid4()) sm.create_trial(TrialName=trial_name, ExperimentName=experiment_name) trial_component_name = "tc-" + str(uuid.uuid4()) trials[trial_name] = trial_component_name sm.create_trial_component(TrialComponentName=trial_component_name, DisplayName="Training") sm.update_trial_component( TrialComponentName=trial_component_name, Parameters={"hp1": { "NumberValue": i }}, InputArtifacts={ "inputArtifacts1": { "MediaType": "text/csv", "Value": "s3:/foo/bar1" } }, OutputArtifacts={ "outputArtifacts1": { "MediaType": "text/plain", "Value": "s3:/foo/bar2" } }, ) sm.associate_trial_component( TrialComponentName=trial_component_name, TrialName=trial_name) time.sleep(1) time.sleep(15) # wait for search to get updated # allow search time thrice for _ in range(3): analytics = ExperimentAnalytics( experiment_name=experiment_name, sagemaker_session=sagemaker_session) if len(analytics.dataframe().columns) > 0: break time.sleep(15) yield experiment_name finally: _delete_resources(sm, experiment_name, trials)
def test_trial_analytics_dataframe_selected_hyperparams(mock_session): mock_session.sagemaker_client.search.return_value = { "Results": [ { "TrialComponent": trial_component("trial-1") }, { "TrialComponent": trial_component("trial-2") }, ] } analytics = ExperimentAnalytics(experiment_name="experiment1", parameter_names=["hp2"], sagemaker_session=mock_session) expected_dataframe = pd.DataFrame.from_dict( OrderedDict([ ("TrialComponentName", ["trial-1", "trial-2"]), ("DisplayName", ["Training", "Training"]), ("SourceArn", ["some-source-arn", "some-source-arn"]), ("hp2", ["abc", "abc"]), ("metric1 - Min", [3.0, 3.0]), ("metric1 - Max", [5.0, 5.0]), ("metric1 - Avg", [4.0, 4.0]), ("metric1 - StdDev", [1.0, 1.0]), ("metric1 - Last", [2.0, 2.0]), ("metric1 - Count", [2.0, 2.0]), ("metric2 - Min", [8.0, 8.0]), ("metric2 - Max", [10.0, 10.0]), ("metric2 - Avg", [9.0, 9.0]), ("metric2 - StdDev", [0.05, 0.05]), ("metric2 - Last", [7.0, 7.0]), ("metric2 - Count", [2.0, 2.0]), ("inputArtifacts1 - MediaType", ["text/plain", "text/plain"]), ("inputArtifacts1 - Value", ["s3:/foo/bar1", "s3:/foo/bar1"]), ("inputArtifacts2 - MediaType", ["text/plain", "text/plain"]), ("inputArtifacts2 - Value", ["s3:/foo/bar2", "s3:/foo/bar2"]), ("outputArtifacts1 - MediaType", ["text/csv", "text/csv"]), ("outputArtifacts1 - Value", ["s3:/sky/far1", "s3:/sky/far1"]), ("outputArtifacts2 - MediaType", ["text/csv", "text/csv"]), ("outputArtifacts2 - Value", ["s3:/sky/far2", "s3:/sky/far2"]), ("Trials", [["trial1"], ["trial1"]]), ("Experiments", [["experiment1"], ["experiment1"]]), ])) pd.testing.assert_frame_equal(expected_dataframe, analytics.dataframe()) expected_search_exp = { "Filters": [{ "Name": "Parents.ExperimentName", "Operator": "Equals", "Value": "experiment1" }] } mock_session.sagemaker_client.search.assert_called_with( Resource="ExperimentTrialComponent", SearchExpression=expected_search_exp)
def test_experiment_analytics_pagination(sagemaker_session): with experiment(sagemaker_session) as experiment_name: analytics = ExperimentAnalytics( experiment_name=experiment_name, sagemaker_session=sagemaker_session ) assert list(analytics.dataframe().columns) == ["TrialComponentName", "DisplayName", "hp1"] assert ( len(analytics.dataframe()) > 10 ) # TODO [owen-t] Replace with == 20 and put test in retry block
def test_trial_analytics_dataframe_filter_trials_search_exp_only(mock_session): mock_session.sagemaker_client.search.return_value = {"Results": []} search_exp = {"Filters": [{"Name": "Tags.someTag", "Operator": "Equals", "Value": "someValue"}]} analytics = ExperimentAnalytics(search_expression=search_exp, sagemaker_session=mock_session) analytics.dataframe() mock_session.sagemaker_client.search.assert_called_with( Resource="ExperimentTrialComponent", SearchExpression=search_exp )
def test_experiment_analytics_artifacts(sagemaker_session): with experiment_with_artifacts(sagemaker_session) as experiment_name: analytics = ExperimentAnalytics(experiment_name=experiment_name, sagemaker_session=sagemaker_session) assert list(analytics.dataframe().columns) == [ "TrialComponentName", "DisplayName", "hp1", "inputArtifacts1 - MediaType", "inputArtifacts1 - Value", "outputArtifacts1 - MediaType", "outputArtifacts1 - Value", ]
def test_experiment_analytics_search_by_nested_filter(sagemaker_session): sm = sagemaker_session.sagemaker_client experiment_name = "experiment" + str(uuid.uuid4()) sm.create_experiment(ExperimentName=experiment_name) for i in range(20): trial_name = "trial-" + str(uuid.uuid4()) sm.create_trial(TrialName=trial_name, ExperimentName=experiment_name) trial_component_name = "tc-" + str(uuid.uuid4()) sm.create_trial_component(TrialComponentName=trial_component_name, DisplayName="Training") sm.update_trial_component(TrialComponentName=trial_component_name, Parameters={"hp1": { "NumberValue": i }}) sm.associate_trial_component(TrialComponentName=trial_component_name, TrialName=trial_name) time.sleep( 15 ) # wait for search to get updated TODO [owen-t]: Replace with retry search_exp = { "Filters": [ { "Name": "Parents.ExperimentName", "Operator": "Equals", "Value": experiment_name }, { "Name": "Parameters.hp1", "Operator": "GreaterThanOrEqualTo", "Value": "10" }, ] } analytics = ExperimentAnalytics(sagemaker_session=sagemaker_session, search_expression=search_exp) assert list(analytics.dataframe().columns) == [ "TrialComponentName", "DisplayName", "hp1" ] assert (len(analytics.dataframe()) > 5 ) # TODO [owen-t] Replace with == 10 and put test in retry block
def test_trial_analytics_dataframe_selected_metrics(mock_session): mock_session.sagemaker_client.search.return_value = { "Results": [ { "TrialComponent": trial_component("trial-1") }, { "TrialComponent": trial_component("trial-2") }, ] } analytics = ExperimentAnalytics(experiment_name="experiment1", metric_names=["metric1"], sagemaker_session=mock_session) expected_dataframe = pd.DataFrame.from_dict( OrderedDict([ ("TrialComponentName", ["trial-1", "trial-2"]), ("DisplayName", ["Training", "Training"]), ("SourceArn", ["some-source-arn", "some-source-arn"]), ("hp1", [1.0, 1.0]), ("hp2", ["abc", "abc"]), ("metric1 - Min", [3.0, 3.0]), ("metric1 - Max", [5.0, 5.0]), ("metric1 - Avg", [4.0, 4.0]), ("metric1 - StdDev", [1.0, 1.0]), ("metric1 - Last", [2.0, 2.0]), ("metric1 - Count", [2.0, 2.0]), ])) pd.testing.assert_frame_equal(expected_dataframe, analytics.dataframe()) expected_search_exp = { "Filters": [{ "Name": "Parents.ExperimentName", "Operator": "Equals", "Value": "experiment1" }] } mock_session.sagemaker_client.search.assert_called_with( Resource="ExperimentTrialComponent", SearchExpression=expected_search_exp)
def test_trial_analytics_dataframe_throws_error_if_no_filter_specified( mock_session): with pytest.raises(ValueError): ExperimentAnalytics(sagemaker_session=mock_session)