def test_e2e_csv_blockifier_plugin(): client = get_steamship_client() csv_blockifier_plugin_path = PLUGINS_PATH / "blockifiers" / "csv_blockifier.py" version_config_template = dict( text_column=dict(type="string"), tag_columns=dict(type="string"), tag_kind=dict(type="string"), ) # TODO (enias): Derive this from Config instance_config = dict( # Has to match up text_column="Message", tag_columns="Category", tag_kind="Intent", ) with deploy_plugin( client, csv_blockifier_plugin_path, "blockifier", version_config_template=version_config_template, instance_config=instance_config, ) as (plugin, version, instance): with upload_file(client, "utterances.csv") as file: assert len(file.refresh().data.blocks) == 0 file.blockify(plugin_instance=instance.handle).wait() # Check the number of blocks blocks = file.refresh().data.blocks assert len(blocks) == 5 for block in blocks: assert block.tags is not None assert len(block.tags) > 0 for tag in block.tags: assert tag.name is not None assert tag.kind is not None file.delete()
def test_get_training_parameters(): """Any trainable plugin needs a Python+Lambda component that can report its trainable params. This tests that all the plumbing works for that to be returned""" client = get_steamship_client() tagger_path = PLUGINS_PATH / "taggers" / "plugin_trainable_tagger.py" # Now make a trainable tagger to train on those tags with deploy_plugin( client, tagger_path, "tagger", training_platform=HostingType.LAMBDA, ) as (tagger, taggerVersion, taggerInstance): training_request = TrainingParameterPluginInput(plugin_instance=taggerInstance.handle) res = taggerInstance.get_training_parameters( training_request ) # TODO (enias): How is this working? assert res.data is not None params = res.data assert params.training_epochs is not None assert params.training_epochs == TRAINING_PARAMETERS.training_epochs assert math.isclose( params.testing_holdout_percent, TRAINING_PARAMETERS.testing_holdout_percent, abs_tol=0.0001, ) assert params.training_params == TRAINING_PARAMETERS.training_params
def test_e2e_corpus_importer(): client = get_steamship_client() corpus_importer_path = PLUGINS_PATH / "importers" / "plugin_corpus_importer.py" file_importer_path = PLUGINS_PATH / "importers" / "plugin_file_importer.py" with temporary_space(client) as space: with deploy_plugin(client, file_importer_path, "fileImporter", space_id=space.id) as ( _, _, fi_instance, ): with deploy_plugin(client, corpus_importer_path, "corpusImporter", space_id=space.id) as ( plugin, version, instance, ): req = CorpusImportRequest( type="file", value="dummy-value", plugin_instance=instance.handle, file_importer_plugin_instance=fi_instance.handle, ) res = client.post( "plugin/instance/importCorpus", req, expect=CorpusImportResponse, space_id=space.id, ) res.wait() # We should now have two files! files = File.list(client, space_id=space.id).data assert files.files is not None assert len(files.files) == 2 for file in files.files: data = file.raw().data assert data.decode("utf-8") == TEST_DOC file.delete()
def test_e2e_blockifier_plugin(): client = get_steamship_client() blockifier_path = PLUGINS_PATH / "blockifiers" / "blockifier.py" with deploy_plugin(client, blockifier_path, "blockifier") as ( plugin, version, instance, ): file = File.create(client=client, content="This is a test.").data assert len(file.refresh().data.blocks) == 0 file.blockify(plugin_instance=instance.handle).wait() assert len(file.refresh().data.blocks) == 4 file.delete()
def test_e2e_tagger(): client = get_steamship_client() parser_path = PLUGINS_PATH / "taggers" / "plugin_parser.py" # TODO (enias): Use Enum for plugin type with deploy_plugin(client, parser_path, "tagger") as (plugin, version, instance): test_doc = "Hi there" res = instance.tag(doc=test_doc) res.wait() assert res.error is None assert res.data is not None assert len(res.data.file.blocks) == 1 assert res.data.file.blocks[0].text == test_doc # Let's try it on a file. This is the same test we run on the Swift test parser. # Since the python test parser is implemented to behave the same, we can reuse it! tag_file(client, instance.handle)
def test_e2e_importer(client: Steamship): file_importer_path = PLUGINS_PATH / "importers" / "plugin_file_importer.py" with deploy_plugin(client, file_importer_path, "fileImporter") as ( plugin, version, instance, ): # The test FileImporter should always return a string file with contents TEST_DOC file = File.create(client=client, content="This is a test.", plugin_instance=instance.handle).data # Now fetch the data from Steamship and assert that it is the SAME as the data the FileImporter creates data = file.raw().data assert data.decode("utf-8") == TEST_DOC file.delete()
def test_e2e_corpus_export(client: Steamship): version_config_template = dict( text_column=dict(type="string"), tag_columns=dict(type="string"), tag_kind=dict(type="string"), ) # TODO (enias): Derive this from Config instance_config = dict( # Has to match up text_column="Message", tag_columns="Category", tag_kind="Intent", ) exporter_plugin_r = PluginInstance.create( client=client, handle=EXPORTER_HANDLE, plugin_handle=EXPORTER_HANDLE, upsert=True, ) assert exporter_plugin_r.data is not None exporter_plugin = exporter_plugin_r.data assert exporter_plugin.handle is not None _input = ExportPluginInput(handle="default", type="file") csv_blockifier_path = PLUGINS_PATH / "blockifiers" / "csv_blockifier.py" # Make a blockifier which will generate our trainable corpus with deploy_plugin( client, csv_blockifier_path, "blockifier", version_config_template=version_config_template, instance_config=instance_config, ) as (plugin, version, instance): with upload_file(client, "utterances.csv") as file: assert len(file.refresh().data.blocks) == 0 # Use the plugin we just registered file.blockify(plugin_instance=instance.handle).wait() assert len(file.refresh().data.blocks) == 5 # Now export the corpus raw_data_r = exporter_plugin.export(_input) assert raw_data_r is not None # The results of a corpus exporter are MD5 encoded! _ = raw_data_r.data
def test_e2e_third_party_trainable_tagger_lambda_training(): client = get_steamship_client() spaceR = Space.get(client) # TODO (enias): Remove assert spaceR.data is not None exporter_plugin_r = PluginInstance.create( client=client, handle=EXPORTER_HANDLE, plugin_handle=EXPORTER_HANDLE, upsert=True, # Don't care if it already exists ) assert exporter_plugin_r.data is not None exporter_plugin = exporter_plugin_r.data assert exporter_plugin.handle is not None third_party_trainable_tagger_path = ( PLUGINS_PATH / "taggers" / "plugin_third_party_trainable_tagger.py") # Note that we're going to do the below training on ZERO data for simplicity. # The particular test model doesn't actually incorporate any data given to it at training time, so # it would just slow the test down to create, blockify, and export a training corpus. with deploy_plugin( client, third_party_trainable_tagger_path, "tagger", training_platform=HostingType.LAMBDA) as (tagger, tagger_version, tagger_instance): # Now train the plugin training_request = TrainingParameterPluginInput( plugin_instance=tagger_instance.handle, export_plugin_input=ExportPluginInput( plugin_instance=exporter_plugin.handle, type="file", query="all"), ) train_result = tagger_instance.train(training_request) train_result.wait() assert train_result.data is not None output = train_result.data assert output.training_complete assert output.training_reference_data is not None assert output.training_reference_data["num_checkins"] == 3 logging.info("Waiting 15 seconds for instance to deploy.") import time time.sleep(15) # Now we'll attempt to USE this plugin. This plugin's behavior is to simply tag every block with # the parameters `MockClient.LABELS` # First we'll create a file test_doc = "Hi there" res = tagger_instance.tag(doc=test_doc) res.wait() assert res.error is None assert res.data is not None assert res.data.file is not None assert not res.data.file.tags assert res.data.file.blocks is not None assert len(res.data.file.blocks) > 0 for block in res.data.file.blocks: assert block.tags is not None assert sorted([tag.name for tag in block.tags]) == sorted(MockClient.LABELS)
def test_e2e_trainable_tagger_lambda_training(client: Steamship): version_config_template = dict( text_column=dict(type="string"), tag_columns=dict(type="string"), tag_kind=dict(type="string"), ) instance_config = dict(text_column="Message", tag_columns="Category", tag_kind="Intent") exporter_plugin_r = PluginInstance.create( client=client, handle=EXPORTER_HANDLE, plugin_handle=EXPORTER_HANDLE, upsert=True, ) assert exporter_plugin_r.data is not None exporter_plugin = exporter_plugin_r.data assert exporter_plugin.handle is not None csv_blockifier_path = PLUGINS_PATH / "blockifiers" / "csv_blockifier.py" trainable_tagger_path = PLUGINS_PATH / "taggers" / "plugin_trainable_tagger.py" # Make a blockifier which will generate our trainable corpus with deploy_plugin( client, csv_blockifier_path, "blockifier", version_config_template=version_config_template, instance_config=instance_config, ) as (plugin, version, instance): with upload_file(client, "utterances.csv") as file: assert len(file.refresh().data.blocks) == 0 # Use the plugin we just registered file.blockify(plugin_instance=instance.handle).wait() assert len(file.refresh().data.blocks) == 5 # Now make a trainable tagger to train on those tags with deploy_plugin( client, trainable_tagger_path, "tagger", training_platform=HostingType.LAMBDA ) as (tagger, tagger_version, tagger_instance): # Now train the plugin training_request = TrainingParameterPluginInput( plugin_instance=tagger_instance.handle, export_plugin_input=ExportPluginInput( plugin_instance=EXPORTER_HANDLE, type="file", query='kind "foo1"' ), training_params=dict( keyword_list=KEYWORDS # This is a key defined by the test model we're training ), ) train_result = tagger_instance.train(training_request) train_result.wait() # At this point, the PluginInstance will have written a parameter file to disk. We should be able to # retrieve it since we know that it is tagged as the `default`. checkpoint = ModelCheckpoint( client=client, handle="default", plugin_instance_id=tagger_instance.id, ) checkpoint_path = checkpoint.download_model_bundle() assert checkpoint_path.exists() keyword_path = Path(checkpoint_path) / TestTrainableTaggerModel.KEYWORD_LIST_FILE assert keyword_path.exists() with open(keyword_path, "r") as f: params = json.loads(f.read()) assert params == KEYWORDS logging.info("Waiting 15 seconds for instance to deploy.") import time time.sleep(15) # If we're here, we have verified that the plugin instance has correctly recorded its parameters # into the pluginData bucket under a path unique to the PluginInstnace/ModelCheckpoint. # Now we'll attempt to USE this plugin. This plugin's behavior is to simply tag any file with the # tags that parameter it. Since those tags are (see above) ["product", "coupon"] we should expect # this tagger to apply those tags to any file provided to it. # First we'll create a file test_doc = "Hi there" res = tagger_instance.tag(doc=test_doc) res.wait() assert res.error is None assert res.data is not None assert res.data.file is not None assert res.data.file.tags is not None assert len(res.data.file.tags) == len(KEYWORDS) assert sorted([tag.name for tag in res.data.file.tags]) == sorted(KEYWORDS)
def test_e2e_parser(): client = get_steamship_client() tagger_plugin_path = PLUGINS_PATH / "taggers" / "plugin_configurable_tagger.py" config_template = { "tagKind": { "type": "string" }, "tagName": { "type": "string" }, "numberValue": { "type": "number" }, "booleanValue": { "type": "boolean" }, } instance_config1 = { "tagKind": "testTagKind", "tagName": "testTagName", "numberValue": 3, "booleanValue": True, } with deploy_plugin( client, tagger_plugin_path, "tagger", version_config_template=config_template, instance_config=instance_config1, ) as (plugin, version, instance): test_doc = "Hi there" res = instance.tag(doc=test_doc) res.wait() assert res.error is None assert res.data is not None assert len(res.data.file.blocks) == 1 assert res.data.file.blocks[0].text == test_doc # Validate configured content assert len(res.data.file.tags) == 1 tag = res.data.file.tags[0] assert tag.name == instance_config1["tagName"] assert tag.kind == instance_config1["tagKind"] tag_value = tag.value assert tag_value["numberValue"] == instance_config1["numberValue"] assert tag_value["booleanValue"] == instance_config1["booleanValue"] instance_config2 = { "tagKind": "testTagKind2", "tagName": "testTagName2", "numberValue": 4, "booleanValue": False, } instance2 = PluginInstance.create( client, plugin_id=plugin.id, plugin_version_id=version.id, config=instance_config2, ) instance2.wait() assert instance2.error is None assert instance2.data is not None instance2 = instance2.data res = instance2.tag(doc=test_doc) res.wait() assert res.error is None assert res.data is not None assert len(res.data.file.blocks) == 1 assert res.data.file.blocks[0].text == test_doc # Validate configured content assert len(res.data.file.tags) == 1 tag = res.data.file.tags[0] assert tag.name == instance_config2["tagName"] assert tag.kind == instance_config2["tagKind"] tag_value = tag.value assert tag_value["numberValue"] == instance_config2["numberValue"] assert tag_value["booleanValue"] == instance_config2["booleanValue"]