def build_scenario(build_plan,
                   filter_on='ready',
                   connection='dataiku_workspace',
                   ref_table='referentialclient',
                   ref_project='DIReferential',
                   add_ecrm_context=True,
                   finish_on_client=None,
                   single_client=None):
    scenario = Scenario()
    if not isinstance(filter_on, list):
        filter_on = [filter_on]
    project_name = dataiku.default_project_key()
    project_key = dataiku.api_client().get_project(project_name)
    local_variables = project_key.get_variables()['local']
    env = local_variables['env']
    kut.display_message('reading client context referential')

    executor = SQLExecutor2(connection=connection)
    sql_query_referential_client = "SELECT * FROM " + '_'.join(
        [env, ref_project, ref_table])
    client_ref = executor.query_to_df(sql_query_referential_client)
    filter_query = ' & '.join(filter_on)
    client_ref = client_ref.query(filter_query) if filter_query else client_ref
    kut.display_message('Client ready for automation  : ' +
                        client_ref.clientName.unique())

    kut.display_message('run configuration')
    print(build_plan)

    if not pd.isnull(finish_on_client):
        finish_client = client_ref[client_ref.clientName == finish_on_client]
        if len(finish_client) == 0:
            kut.display_message(
                'finish client not found in plan ' + finish_on_client +
                ' is the client name valid ?'
            )  # Example: load a DSS dataset as a Pandas dataframe
        other_clients = client_ref[client_ref.clientName != finish_on_client]
        client_ref = pd.concat([other_clients, finish_client],
                               ignore_index=True)
    success = []
    if single_client is not None:
        requested_client = client_ref[client_ref.clientName == single_client]
        if not len(single_client):
            kut.display_message(
                'requested single client is not found,building all allowed clients'
            )
        else:
            client_ref = requested_client
    for index, client_row in client_ref.iterrows():
        variables = set_client_context(client_row=client_row,
                                       add_ecrm_context=add_ecrm_context,
                                       connection=connection)
        client_name = variables['local']['clientName']
        kut.display_message('starting builds on ' + client_name)

        run_scenario(table_plan=build_plan, scenario=scenario)
        success.append(client_name)
        scenario.set_global_variables(successfullRun=success)
        print('done_________________' + client_name)
    return success
Ejemplo n.º 2
0
from dataiku.scenario import Scenario

scenario = Scenario()

trigger_type = scenario.get_trigger_type()

trigger_name = scenario.get_trigger_name()

# depending on the trigger type, different metadata can be available
trigger_params = scenario.get_trigger_params()
Ejemplo n.º 3
0
# this part can be used in a custom scenario's script, or in a "Execute python" step in a step-based scenario
import dataiku
from dataiku.scenario import Scenario

s = Scenario()

dataset_name = 'input_partitioned'
output_name = 'output'
# fetch the partitions
ds = dataiku.Dataset(dataset_name)
all_partitions = ds.list_partitions()
print("Dataset %s has %s partitions" % (dataset_name, len(all_partitions)))

# maybe filter partitions, depending on your usage
partitions_to_build = all_partitions

# build the variable's value as a comma separated string
partition_list_value = ','.join(partitions_to_build)
s.set_scenario_variables(partition_list=partition_list_value)

# in a step-based scenario:
# add a build step to build the output dataset, and set ${partition_list} as the partition identifier

# in a custom scenario:
# launch the build
s.build_dataset(output_name, partitions='${partition_list}')
# alternatively, in a custom scenario, you can pass the value directly, without using a variable
#s.build_dataset(output_name, partitions=partition_list_value)
Ejemplo n.º 4
0
from dataiku.scenario import Scenario

scenario = Scenario()

# Note that you must be admin to update global variables
scenario.set_global_variables(var1="val1", var2=3)
Ejemplo n.º 5
0
from dataiku.scenario import Scenario

scenario = Scenario()

# Note that you must be admin to update global variables
scenario.run_global_variables_update()
Ejemplo n.º 6
0
from dataiku.scenario import Scenario

scenario = Scenario()

# Partitions are specified using the partitions spec syntax
scenario.build_dataset("mydataset", partitions="partition1|partition2")
Ejemplo n.º 7
0
from dataiku.scenario import Scenario
import time

scenario = Scenario()

step1 = scenario.build_dataset("mydataset1", async=True)
step2 = scenario.build_dataset("mydataset2", async=True)

while not step1.is_done() or not step2.is_done():
    # do something while waiting
    time.sleep(1)
Ejemplo n.º 8
0
from dataiku.scenario import Scenario

scenario = Scenario()

# The id of the model is visible in the URL of the model
scenario.train_model("mymodelid")
Ejemplo n.º 9
0
# This sample code helps you get started with the custom scenario API.
#For more details and samples, please see our Documentation
from dataiku.scenario import Scenario

# The Scenario object is the main handle from which you initiate steps
scenario = Scenario()

# A few example steps follow

# Building a dataset
scenario.build_dataset("customers_prepared", partitions="2015-01-03")

# Controlling the train of a dataset
train_ret = scenario.train_model("uSEkldfsm")
trained_model = train_ret.get_trained_model()
performance = trained_model.get_new_version_metrics().get_performance_values()
if performance["AUC"] > 0.85:
    trained_model.activate_new_version()

# Sending custom reports
sender = scenario.get_message_sender("mail-scenario", "local-mail") # A messaging channel
sender.set_params(sender="*****@*****.**", recipient="*****@*****.**")

sender.send(subject="The scenario is doing well", message="All is good")
Ejemplo n.º 10
0
###########################################################################################
# !! CUSTOM SCENARIO EXAMPLE !!                                                           #
# See https://doc.dataiku.com/dss/latest/scenarios/custom_scenarios.html for more details #
###########################################################################################

import time
import dataiku
from dataiku.scenario import Scenario, BuildFlowItemsStepDefHelper
from dataikuapi.dss.future import DSSFuture

TIMEOUT_SECONDS = 3600

s = Scenario()

# Replace this commented block by your Scenario steps
# Example: build a Dataset
step_handle = s.build_dataset("your_dataset_name", asynchronous=True)

start = time.time()
while not step_handle.is_done():
    end = time.time()
    print("Duration: {}s".format(end - start))
    if end - start > TIMEOUT_SECONDS:
        f = DSSFuture(dataiku.api_client(), step_handle.future_id)
        f.abort()
        raise Exception("Scenario was aborted because it took too much time.")
Ejemplo n.º 11
0
from dataiku.scenario import Scenario

scenario = Scenario()

scenario.run_dataset_checks("mydataset")
Ejemplo n.º 12
0
import dataiku
import pandas as pd, numpy as np
from dataiku import pandasutils as pdu
import uuid
from dataiku.scenario import Scenario
PARTNER = 'IHG'
MODEL_TYPE = 'BG'
proj_handle = dataiku.api_client().get_project("IHG_AUTO_V1_1")
proj_var = proj_handle.get_variables()
# Create the main handle to interact with the scenario
scen = Scenario()
step = scen.get_previous_steps_outputs()
# Score #
result_date = ([d for d in step if d['stepName'] == 'query_date'])[0]['result']
new_score_date = result_date['rows'][0][0]

# train #
result_train_ref_date = ([
    d for d in step if d['stepName'] == 'train_ref_date'
])[0]['result']
new_train_date = result_train_ref_date['rows'][0][0]

# valid #
result_valid_ref_date = ([
    d for d in step if d['stepName'] == 'valid_ref_date'
])[0]['result']
new_valid_date = result_valid_ref_date['rows'][0][0]

#previous value
cur_score_date = proj_var["standard"]["scoring_file_date"]
cur_train_date = proj_var["standard"]["train_file_date"]
Ejemplo n.º 13
0
# This sample code helps you get started with the custom scenario API.
#For more details and samples, please see our Documentation
from dataiku.scenario import Scenario

# The Scenario object is the main handle from which you initiate steps
scenario = Scenario()

# A few example steps follow

# Building a dataset

scenario.build_dataset("scores",
                       build_mode="RECURSIVE_FORCED_BUILD",
                       project_key="FRAUD_MODEL")

scenario.build_dataset("unseen_scored", build_mode="RECURSIVE_FORCED_BUILD")
Ejemplo n.º 14
0
from dataiku.scenario import Scenario

scenario = Scenario()

if scenario.get_trigger_type() == 'exec_sql':
    trigger_params = scenario.get_trigger_params()

    # the list of the columns in the query output
    columns = trigger_params['result']['rows']
    # columns contain name and type
    print("\t".join([column['name'] for column in columns]))
    print("\t".join([column['type'] for column in columns]))

    # the rows in the result, as an array of array of strings
    rows = trigger_params['result']['rows']
    for row in rows:
        print("\t".join(row))
Ejemplo n.º 15
0
from dataiku.scenario import Scenario

scenario = Scenario()

# The id of the folder is visible in the URL of the managed folder
scenario.build_folder("myfolderid")
# this part can be used in a custom scenario's script, or in a "Execute python" step in a step-based scenario
import dataiku
from dataiku.scenario import Scenario

s = Scenario()

#SET DATASET NAMES
# The dataset that gets its partitions added outside of dataiku, or through
# another process should be set in the updated variable
###
# The dataset that the original dataset writes to should be the old variable
updated = 'customers'
old = 'customers_prepared'


# fetch the partitions
def partition_list(dataset):
    return dataiku.Dataset(dataset).list_partitions()


# Get partitions in new set that aren't in old set
partitions_to_build = list(
    set(partition_list(updated)) - set(partition_list(old)))

# build the variable's value as a comma separated string
partition_list_value = ','.join(partitions_to_build)
s.set_scenario_variables(partition_list=partition_list_value)

print partition_list_value

# in a step-based scenario:
Ejemplo n.º 17
0
from dataiku.scenario import Scenario

scenario = Scenario()

scenario.execute_sql("connection", "UPDATE TABLE t SET ...")
Ejemplo n.º 18
0
# Get senario handler
from dataiku.scenario import Scenario

scenario = Scenario()

# Create a message sender
sender = scenario.get_message_sender(channel_id="gmail")  # A messaging channel

# Define your attachment
attachment = {
    "destinationType": "DOWNLOAD",
    "destinationDatasetProjectKey": "DKU_CHURN",
    "overwriteDestinationDataset": "false",
    "selection": {
        "samplingMethod": "FULL",
        "partitionSelectionMethod": "ALL",
        "targetRatio": 0.02,
        "maxRecords": 100000,
        "selectedPartitions": [],
        "ordering": {
            "enabled": "false",
            "rules": []
        }
    },
    "advancedMode": "false",
    "exportOption": {
        "id": "excel",
        "label": "Excel (*.xlsx)",
        "canStream": "false",
        "formatType": "excel",
        "predefinedConfig": {
Ejemplo n.º 19
0
from dataiku.scenario import Scenario

scenario = Scenario()

# Note that you must be admin to update global variables
scenario.set_project_variables(var1="val1", var2=3)
Ejemplo n.º 20
0
from dataiku.scenario import Scenario

scenario = Scenario()

scenario.set_scenario_variables(var1="val1", var2=34)
Ejemplo n.º 21
0
from dataiku.scenario import Scenario

scenario = Scenario()

scenario.synchronize_hive_metastore("mydataset")
Ejemplo n.º 22
0
from dataiku.scenario import Scenario

scenario = Scenario()

scenario.build_dataset("mydataset")