def build_scenario(build_plan, filter_on='ready', connection='dataiku_workspace', ref_table='referentialclient', ref_project='DIReferential', add_ecrm_context=True, finish_on_client=None, single_client=None): scenario = Scenario() if not isinstance(filter_on, list): filter_on = [filter_on] project_name = dataiku.default_project_key() project_key = dataiku.api_client().get_project(project_name) local_variables = project_key.get_variables()['local'] env = local_variables['env'] kut.display_message('reading client context referential') executor = SQLExecutor2(connection=connection) sql_query_referential_client = "SELECT * FROM " + '_'.join( [env, ref_project, ref_table]) client_ref = executor.query_to_df(sql_query_referential_client) filter_query = ' & '.join(filter_on) client_ref = client_ref.query(filter_query) if filter_query else client_ref kut.display_message('Client ready for automation : ' + client_ref.clientName.unique()) kut.display_message('run configuration') print(build_plan) if not pd.isnull(finish_on_client): finish_client = client_ref[client_ref.clientName == finish_on_client] if len(finish_client) == 0: kut.display_message( 'finish client not found in plan ' + finish_on_client + ' is the client name valid ?' ) # Example: load a DSS dataset as a Pandas dataframe other_clients = client_ref[client_ref.clientName != finish_on_client] client_ref = pd.concat([other_clients, finish_client], ignore_index=True) success = [] if single_client is not None: requested_client = client_ref[client_ref.clientName == single_client] if not len(single_client): kut.display_message( 'requested single client is not found,building all allowed clients' ) else: client_ref = requested_client for index, client_row in client_ref.iterrows(): variables = set_client_context(client_row=client_row, add_ecrm_context=add_ecrm_context, connection=connection) client_name = variables['local']['clientName'] kut.display_message('starting builds on ' + client_name) run_scenario(table_plan=build_plan, scenario=scenario) success.append(client_name) scenario.set_global_variables(successfullRun=success) print('done_________________' + client_name) return success
from dataiku.scenario import Scenario scenario = Scenario() trigger_type = scenario.get_trigger_type() trigger_name = scenario.get_trigger_name() # depending on the trigger type, different metadata can be available trigger_params = scenario.get_trigger_params()
# this part can be used in a custom scenario's script, or in a "Execute python" step in a step-based scenario import dataiku from dataiku.scenario import Scenario s = Scenario() dataset_name = 'input_partitioned' output_name = 'output' # fetch the partitions ds = dataiku.Dataset(dataset_name) all_partitions = ds.list_partitions() print("Dataset %s has %s partitions" % (dataset_name, len(all_partitions))) # maybe filter partitions, depending on your usage partitions_to_build = all_partitions # build the variable's value as a comma separated string partition_list_value = ','.join(partitions_to_build) s.set_scenario_variables(partition_list=partition_list_value) # in a step-based scenario: # add a build step to build the output dataset, and set ${partition_list} as the partition identifier # in a custom scenario: # launch the build s.build_dataset(output_name, partitions='${partition_list}') # alternatively, in a custom scenario, you can pass the value directly, without using a variable #s.build_dataset(output_name, partitions=partition_list_value)
from dataiku.scenario import Scenario scenario = Scenario() # Note that you must be admin to update global variables scenario.set_global_variables(var1="val1", var2=3)
from dataiku.scenario import Scenario scenario = Scenario() # Note that you must be admin to update global variables scenario.run_global_variables_update()
from dataiku.scenario import Scenario scenario = Scenario() # Partitions are specified using the partitions spec syntax scenario.build_dataset("mydataset", partitions="partition1|partition2")
from dataiku.scenario import Scenario import time scenario = Scenario() step1 = scenario.build_dataset("mydataset1", async=True) step2 = scenario.build_dataset("mydataset2", async=True) while not step1.is_done() or not step2.is_done(): # do something while waiting time.sleep(1)
from dataiku.scenario import Scenario scenario = Scenario() # The id of the model is visible in the URL of the model scenario.train_model("mymodelid")
# This sample code helps you get started with the custom scenario API. #For more details and samples, please see our Documentation from dataiku.scenario import Scenario # The Scenario object is the main handle from which you initiate steps scenario = Scenario() # A few example steps follow # Building a dataset scenario.build_dataset("customers_prepared", partitions="2015-01-03") # Controlling the train of a dataset train_ret = scenario.train_model("uSEkldfsm") trained_model = train_ret.get_trained_model() performance = trained_model.get_new_version_metrics().get_performance_values() if performance["AUC"] > 0.85: trained_model.activate_new_version() # Sending custom reports sender = scenario.get_message_sender("mail-scenario", "local-mail") # A messaging channel sender.set_params(sender="*****@*****.**", recipient="*****@*****.**") sender.send(subject="The scenario is doing well", message="All is good")
########################################################################################### # !! CUSTOM SCENARIO EXAMPLE !! # # See https://doc.dataiku.com/dss/latest/scenarios/custom_scenarios.html for more details # ########################################################################################### import time import dataiku from dataiku.scenario import Scenario, BuildFlowItemsStepDefHelper from dataikuapi.dss.future import DSSFuture TIMEOUT_SECONDS = 3600 s = Scenario() # Replace this commented block by your Scenario steps # Example: build a Dataset step_handle = s.build_dataset("your_dataset_name", asynchronous=True) start = time.time() while not step_handle.is_done(): end = time.time() print("Duration: {}s".format(end - start)) if end - start > TIMEOUT_SECONDS: f = DSSFuture(dataiku.api_client(), step_handle.future_id) f.abort() raise Exception("Scenario was aborted because it took too much time.")
from dataiku.scenario import Scenario scenario = Scenario() scenario.run_dataset_checks("mydataset")
import dataiku import pandas as pd, numpy as np from dataiku import pandasutils as pdu import uuid from dataiku.scenario import Scenario PARTNER = 'IHG' MODEL_TYPE = 'BG' proj_handle = dataiku.api_client().get_project("IHG_AUTO_V1_1") proj_var = proj_handle.get_variables() # Create the main handle to interact with the scenario scen = Scenario() step = scen.get_previous_steps_outputs() # Score # result_date = ([d for d in step if d['stepName'] == 'query_date'])[0]['result'] new_score_date = result_date['rows'][0][0] # train # result_train_ref_date = ([ d for d in step if d['stepName'] == 'train_ref_date' ])[0]['result'] new_train_date = result_train_ref_date['rows'][0][0] # valid # result_valid_ref_date = ([ d for d in step if d['stepName'] == 'valid_ref_date' ])[0]['result'] new_valid_date = result_valid_ref_date['rows'][0][0] #previous value cur_score_date = proj_var["standard"]["scoring_file_date"] cur_train_date = proj_var["standard"]["train_file_date"]
# This sample code helps you get started with the custom scenario API. #For more details and samples, please see our Documentation from dataiku.scenario import Scenario # The Scenario object is the main handle from which you initiate steps scenario = Scenario() # A few example steps follow # Building a dataset scenario.build_dataset("scores", build_mode="RECURSIVE_FORCED_BUILD", project_key="FRAUD_MODEL") scenario.build_dataset("unseen_scored", build_mode="RECURSIVE_FORCED_BUILD")
from dataiku.scenario import Scenario scenario = Scenario() if scenario.get_trigger_type() == 'exec_sql': trigger_params = scenario.get_trigger_params() # the list of the columns in the query output columns = trigger_params['result']['rows'] # columns contain name and type print("\t".join([column['name'] for column in columns])) print("\t".join([column['type'] for column in columns])) # the rows in the result, as an array of array of strings rows = trigger_params['result']['rows'] for row in rows: print("\t".join(row))
from dataiku.scenario import Scenario scenario = Scenario() # The id of the folder is visible in the URL of the managed folder scenario.build_folder("myfolderid")
# this part can be used in a custom scenario's script, or in a "Execute python" step in a step-based scenario import dataiku from dataiku.scenario import Scenario s = Scenario() #SET DATASET NAMES # The dataset that gets its partitions added outside of dataiku, or through # another process should be set in the updated variable ### # The dataset that the original dataset writes to should be the old variable updated = 'customers' old = 'customers_prepared' # fetch the partitions def partition_list(dataset): return dataiku.Dataset(dataset).list_partitions() # Get partitions in new set that aren't in old set partitions_to_build = list( set(partition_list(updated)) - set(partition_list(old))) # build the variable's value as a comma separated string partition_list_value = ','.join(partitions_to_build) s.set_scenario_variables(partition_list=partition_list_value) print partition_list_value # in a step-based scenario:
from dataiku.scenario import Scenario scenario = Scenario() scenario.execute_sql("connection", "UPDATE TABLE t SET ...")
# Get senario handler from dataiku.scenario import Scenario scenario = Scenario() # Create a message sender sender = scenario.get_message_sender(channel_id="gmail") # A messaging channel # Define your attachment attachment = { "destinationType": "DOWNLOAD", "destinationDatasetProjectKey": "DKU_CHURN", "overwriteDestinationDataset": "false", "selection": { "samplingMethod": "FULL", "partitionSelectionMethod": "ALL", "targetRatio": 0.02, "maxRecords": 100000, "selectedPartitions": [], "ordering": { "enabled": "false", "rules": [] } }, "advancedMode": "false", "exportOption": { "id": "excel", "label": "Excel (*.xlsx)", "canStream": "false", "formatType": "excel", "predefinedConfig": {
from dataiku.scenario import Scenario scenario = Scenario() # Note that you must be admin to update global variables scenario.set_project_variables(var1="val1", var2=3)
from dataiku.scenario import Scenario scenario = Scenario() scenario.set_scenario_variables(var1="val1", var2=34)
from dataiku.scenario import Scenario scenario = Scenario() scenario.synchronize_hive_metastore("mydataset")
from dataiku.scenario import Scenario scenario = Scenario() scenario.build_dataset("mydataset")