from dataiku.scenario import Scenario scenario = Scenario() # Partitions are specified using the partitions spec syntax scenario.build_dataset("mydataset", partitions="partition1|partition2")
# this part can be used in a custom scenario's script, or in a "Execute python" step in a step-based scenario import dataiku from dataiku.scenario import Scenario s = Scenario() dataset_name = 'input_partitioned' output_name = 'output' # fetch the partitions ds = dataiku.Dataset(dataset_name) all_partitions = ds.list_partitions() print("Dataset %s has %s partitions" % (dataset_name, len(all_partitions))) # maybe filter partitions, depending on your usage partitions_to_build = all_partitions # build the variable's value as a comma separated string partition_list_value = ','.join(partitions_to_build) s.set_scenario_variables(partition_list=partition_list_value) # in a step-based scenario: # add a build step to build the output dataset, and set ${partition_list} as the partition identifier # in a custom scenario: # launch the build s.build_dataset(output_name, partitions='${partition_list}') # alternatively, in a custom scenario, you can pass the value directly, without using a variable #s.build_dataset(output_name, partitions=partition_list_value)
# This sample code helps you get started with the custom scenario API. #For more details and samples, please see our Documentation from dataiku.scenario import Scenario # The Scenario object is the main handle from which you initiate steps scenario = Scenario() # A few example steps follow # Building a dataset scenario.build_dataset("customers_prepared", partitions="2015-01-03") # Controlling the train of a dataset train_ret = scenario.train_model("uSEkldfsm") trained_model = train_ret.get_trained_model() performance = trained_model.get_new_version_metrics().get_performance_values() if performance["AUC"] > 0.85: trained_model.activate_new_version() # Sending custom reports sender = scenario.get_message_sender("mail-scenario", "local-mail") # A messaging channel sender.set_params(sender="*****@*****.**", recipient="*****@*****.**") sender.send(subject="The scenario is doing well", message="All is good")
########################################################################################### # !! CUSTOM SCENARIO EXAMPLE !! # # See https://doc.dataiku.com/dss/latest/scenarios/custom_scenarios.html for more details # ########################################################################################### import time import dataiku from dataiku.scenario import Scenario, BuildFlowItemsStepDefHelper from dataikuapi.dss.future import DSSFuture TIMEOUT_SECONDS = 3600 s = Scenario() # Replace this commented block by your Scenario steps # Example: build a Dataset step_handle = s.build_dataset("your_dataset_name", asynchronous=True) start = time.time() while not step_handle.is_done(): end = time.time() print("Duration: {}s".format(end - start)) if end - start > TIMEOUT_SECONDS: f = DSSFuture(dataiku.api_client(), step_handle.future_id) f.abort() raise Exception("Scenario was aborted because it took too much time.")
# This sample code helps you get started with the custom scenario API. #For more details and samples, please see our Documentation from dataiku.scenario import Scenario # The Scenario object is the main handle from which you initiate steps scenario = Scenario() # A few example steps follow # Building a dataset scenario.build_dataset("scores", build_mode="RECURSIVE_FORCED_BUILD", project_key="FRAUD_MODEL") scenario.build_dataset("unseen_scored", build_mode="RECURSIVE_FORCED_BUILD")
from dataiku.scenario import Scenario scenario = Scenario() scenario.build_dataset("mydataset")