def build_scenario(build_plan,
                   filter_on='ready',
                   connection='dataiku_workspace',
                   ref_table='referentialclient',
                   ref_project='DIReferential',
                   add_ecrm_context=True,
                   finish_on_client=None,
                   single_client=None):
    scenario = Scenario()
    if not isinstance(filter_on, list):
        filter_on = [filter_on]
    project_name = dataiku.default_project_key()
    project_key = dataiku.api_client().get_project(project_name)
    local_variables = project_key.get_variables()['local']
    env = local_variables['env']
    kut.display_message('reading client context referential')

    executor = SQLExecutor2(connection=connection)
    sql_query_referential_client = "SELECT * FROM " + '_'.join(
        [env, ref_project, ref_table])
    client_ref = executor.query_to_df(sql_query_referential_client)
    filter_query = ' & '.join(filter_on)
    client_ref = client_ref.query(filter_query) if filter_query else client_ref
    kut.display_message('Client ready for automation  : ' +
                        client_ref.clientName.unique())

    kut.display_message('run configuration')
    print(build_plan)

    if not pd.isnull(finish_on_client):
        finish_client = client_ref[client_ref.clientName == finish_on_client]
        if len(finish_client) == 0:
            kut.display_message(
                'finish client not found in plan ' + finish_on_client +
                ' is the client name valid ?'
            )  # Example: load a DSS dataset as a Pandas dataframe
        other_clients = client_ref[client_ref.clientName != finish_on_client]
        client_ref = pd.concat([other_clients, finish_client],
                               ignore_index=True)
    success = []
    if single_client is not None:
        requested_client = client_ref[client_ref.clientName == single_client]
        if not len(single_client):
            kut.display_message(
                'requested single client is not found,building all allowed clients'
            )
        else:
            client_ref = requested_client
    for index, client_row in client_ref.iterrows():
        variables = set_client_context(client_row=client_row,
                                       add_ecrm_context=add_ecrm_context,
                                       connection=connection)
        client_name = variables['local']['clientName']
        kut.display_message('starting builds on ' + client_name)

        run_scenario(table_plan=build_plan, scenario=scenario)
        success.append(client_name)
        scenario.set_global_variables(successfullRun=success)
        print('done_________________' + client_name)
    return success
Esempio n. 2
0
def get_token():

    # Read in the existing conf
    dss = dataiku.api_client()
    project = dss.get_project(dataiku.default_project_key())
    variables = project.get_variables()["standard"]
    conf = variables.get("powerbi-settings", None)

    # Decrypt
    key = request.args.get("api-key")
    pbi = {}
    pbi["username"] = conf["username"]
    pbi["password"] = decrypt_string(conf["password"], key)
    pbi["client_id"] = conf["client_id"]
    pbi["client_secret"] = decrypt_string(conf["client_secret"], key)
    pbi["resource"] = conf["resource"]
    pbi["grant_type"] = conf["grant_type"]
    pbi["scope"] = conf["scope"]

    # Get the token
    response = requests.post(
        'https://login.microsoftonline.com/common/oauth2/token', data=pbi)
    o = {}
    o["token"] = response.json().get("access_token")

    return json.dumps(o)
Esempio n. 3
0
def get_snowflake_datasets():
    project_key = default_project_key()
    project = api_client().get_project(project_key)
    return [
        dataset for dataset in project.list_datasets()
        if dataset.type == 'Snowflake'
    ]
def get_folder_partition_root(folder, is_input=False):
    """Retrieve the partition root path using a dataiku.Folder.

    Args:
        folder (dataiku.Folder): Input or output folder of the recipe used to retrieve the partition path pattern.
        is_input:  True if the folder must be considered as a input, False if output

    Returns:
        Partition path or None if folder is not partitioned.
    """
    folder_id = folder.get_id()
    input_id = folder_id if is_input else None
    dku_flow_variables = dataiku.get_flow_variables()
    client = dataiku.api_client()
    project = client.get_project(dataiku.default_project_key())
    folder = project.get_managed_folder(folder_id)
    folder_config = folder.get_definition()
    partitioning_config = folder_config.get("partitioning")
    if not partitioning_config:
        return ""
    file_path_pattern = partitioning_config.get("filePathPattern", None)
    dimensions, types = get_dimensions(partitioning_config)
    partitions = get_partitions(dku_flow_variables,
                                dimensions,
                                input_id=input_id)
    file_path = complete_file_path_pattern(file_path_pattern, partitions,
                                           dimensions, types)
    file_path = complete_file_path_time_pattern(dku_flow_variables,
                                                file_path,
                                                input_id=input_id)
    return file_path
Esempio n. 5
0
def save_new_token():

    # Read in the conf and get a token
    conf = json.loads(request.data)
    key = conf["api-key"]
    pbi = {}
    pbi["username"] = conf["powerbi-username"]
    pbi["password"] = conf["powerbi-password"]
    pbi["client_id"] = conf["powerbi-client-id"]
    pbi["client_secret"] = conf["powerbi-client-secret"]
    pbi["resource"] = conf["powerbi-resource"]
    pbi["grant_type"] = conf["powerbi-grant-type"]
    pbi["scope"] = conf["powerbi-scope"]
    response = requests.post(
        'https://login.microsoftonline.com/common/oauth2/token', data=pbi)

    # Save the token
    data = pbi
    data["password"] = encrypt_string(conf["powerbi-password"], key)
    data["client_secret"] = encrypt_string(conf["powerbi-client-secret"], key)
    data["access_token"] = response.json().get("access_token")
    data["created_at"] = str(datetime.datetime.utcnow())
    data["dss_port"] = os.environ["DKU_BACKEND_PORT"]
    data["webapp_project"] = conf["webapp-url"].split("/")[-3]
    data["webapp_id"] = conf["webapp-url"].split("/")[-2]
    data["project_key"] = os.environ["DKU_CURRENT_PROJECT_KEY"]

    set_dss_variables(dataiku.default_project_key(), data)
    # Send back some results
    o = {}
    o["powerbi-access-token"] = data["access_token"]
    return json.dumps(o)
def count_records(dataset: dataiku.Dataset) -> int:
    """
    Count the number of records of a dataset using the Dataiku dataset metrics API
    """
    metric_id = "records:COUNT_RECORDS"
    dataset_name = dataset.name.split(".")[1]
    partitions = dataset.read_partitions
    client = dataiku.api_client()
    project = client.get_project(dataiku.default_project_key())
    logging.info("Counting records of dataset: {}".format(dataset_name))
    if partitions is None or len(partitions) == 0:
        project.get_dataset(dataset_name).compute_metrics(metric_ids=[metric_id])
        metric = dataset.get_last_metric_values()
        record_count = dataiku.ComputedMetrics.get_value_from_data(metric.get_global_data(metric_id=metric_id))
        logging.info("Dataset contains {:d} records and is not partitioned".format(record_count))
    else:
        record_count = 0
        for partition in partitions:
            project.get_dataset(dataset_name).compute_metrics(partition=partition, metric_ids=[metric_id])
            metric = dataset.get_last_metric_values()
            record_count += dataiku.ComputedMetrics.get_value_from_data(
                metric.get_partition_data(partition=partition, metric_id=metric_id)
            )
        logging.info("Dataset contains {:d} records in partition(s) {}".format(record_count, partitions))
    return record_count
Esempio n. 7
0
def get_dataset_flow():
    client = dataiku.api_client()
    project_key = dataiku.default_project_key()
    project = client.get_project(project_key)
    datasets = project.list_datasets()
    dataset_names = [datasets[i]["name"] for i in range(len(datasets))]
    return json.jsonify({"dataset_names": dataset_names})
Esempio n. 8
0
def save_data(id,
              payload,
              content_type,
              label=None,
              project_key=None,
              encoding=None):
    """
    Saves data as a DSS static insight that can be exposed on the dashboard

    :param str id: Unique identifier of the insight within the project. If an insight with the same identifier
                   already exists, it will be replaced
    :param payload: bytes-oriented data, or Base64 string
    :param content_type: the MIME type of the data in payload (example: text/html or image/png)
    :param str label: Optional display label for the insight. If None, the id will be used as label
    :param str project_key: Project key in which the insight must be saved. If None, the contextual (current)
                    project is used
    :param str encoding: If the payload was a Base64 string, this must be "base64". Else, this must be None
    """
    if project_key is None:
        project_key = default_project_key()

    backend_void_call(
        "insights/save-static-file-insight", {
            "projectKey": project_key,
            "id": id,
            "payload": _get_payload(payload, encoding),
            "contentType": content_type,
            "label": label
        })
Esempio n. 9
0
def list_datasets():
    project_key = dataiku.default_project_key()
    client = dataiku.api_client()
    project = client.get_project(project_key)
    dataset_list = [{
        "name": dataset_dict['name']
    } for dataset_dict in project.list_datasets()]
    return json.dumps({'dataset_list': dataset_list})
def get_sql_table(referential_name, project_key='DIReferential'):
    env = get_project_variables(scope='local')['env']
    project_key = dataiku.default_project_key(
    ) if not project_key or project_key == 'self' else project_key
    table_name = '_'.join([env, project_key, referential_name.lower()])
    query = 'SELECT * FROM ' + table_name
    connection = SQLExecutor2(connection='dataiku_workspace')
    return connection.query_to_df(query)
Esempio n. 11
0
 def run(self):
     self.get_inputs()
     self.validation()
     self.keras_model = get_keras_model_from_saved_model(
         default_project_key(), self.model)
     self.onnx_model = convert_from_keras_to_onnx(self.keras_model,
                                                  self.batch_size,
                                                  self.float_32)
     self.write_output()
Esempio n. 12
0
def do(payload, config, plugin_config, inputs):
    project_key = dataiku.default_project_key()
    project_managed_folders = api_client.get_project(project_key).list_managed_folders()

    choices = [{
        'label': '{} ({})'.format(mf['name'], mf['type']),
        'value': mf['id']
    } for mf in project_managed_folders]
    choices.append({'label': 'Create new Filesystem folder...', 'value': 'create_new_folder'})
    return {"choices": choices}
Esempio n. 13
0
def draw_graph():
    #get data
    project_key = dataiku.default_project_key()

    similarity = float(request.args.get('similarity'))
    node_source = request.args.get('node_source')
    node_target = request.args.get('node_target')
    interactions = request.args.get('interactions')
    dataset = request.args.get('dataset')
    name = project_key + '.' + dataset

    print name

    df = dataiku.Dataset(name).get_dataframe()

    df = df[df[interactions] > similarity]
    df = df[[node_source, node_target, interactions]]
    df.columns = ['source', 'target', 'weight']

    print "%d rows" % df.shape[0]
    G = nx.Graph()
    G.add_edges_from(zip(df.source, df.target))

    print nx.info(G)

    # degree
    for node, val in dict(nx.degree(G)).iteritems():
        G.node[node]['degree'] = val
    # pagerank
    for node, val in dict(nx.pagerank(G)).iteritems():
        G.node[node]['pagerank'] = val
    # connected components
    components = sorted(nx.connected_components(G), key=len, reverse=True)
    for component, nodes in enumerate(components):
        for node in nodes:
            G.node[node]['cc'] = component
    # community
    partition = best_partition(G)
    for node, cluster in dict(partition).iteritems():
        G.node[node]['community'] = cluster

    # convert to JSON
    data = json_graph.node_link_data(G)

    #fix for networkx>=2.0 change of API
    if nx.__version__ > 2:
        dict_name_id = {
            data["nodes"][i]["id"]: i
            for i in xrange(len(data["nodes"]))
        }
        for link in data["links"]:
            link["source"] = dict_name_id[link["source"]]
            link["target"] = dict_name_id[link["target"]]

    return json.dumps({"status": "ok", "graph": data})
Esempio n. 14
0
def do(payload, config, plugin_config, inputs):
    if payload["funtastic"] == "engines":
        client = dataiku.api_client()
        project = client.get_project(dataiku.default_project_key())
        engines = project.get_settings().get_raw(
        )['metrics']['engineConfig'].keys()
        return {'engines': engines}

    if payload["funtastic"] == "connections":
        client = dataiku.api_client()
        connections = client.list_connections().keys()
        return {'connections': connections}
def add_project_variable(variable,
                         key,
                         scope='local',
                         list_shaped=True,
                         project_key=None,
                         unique=False):
    project_key = dataiku.default_project_key(
    ) if not project_key else project_key
    project = dataiku.api_client().get_project(project_key)
    variables = project.get_variables()
    value = \
        kut.unique_values_as_string(array=variable, list_shaped=list_shaped, unique=unique) if type(variable) == list \
            else variable
    variables[scope][key] = value
    project.set_variables(variables)
    return variables
def add_ECRM_context(variables, connection='dataiku_workspace'):
    print('adding ecrm context')
    project_name = dataiku.default_project_key()
    project = dataiku.api_client().get_project(project_name)
    local_variables = project.get_variables()['local']
    env = local_variables['env']
    executor = SQLExecutor2(connection=connection)
    sql_query_client_ecrm = "SELECT * FROM " + env + "_DIReferential_referentialECRMOperation"
    client_ecrm = executor.query_to_df(sql_query_client_ecrm)
    ecrm_info = client_ecrm[client_ecrm.clientName == variables['local']
                            ['clientName']]
    print('found', len(ecrm_info), 'relevant entries')
    variables['local']['ecrmOperations'] = {}
    for i, operation_row in ecrm_info.iterrows():
        operation_dict = operation_row.to_dict()
        operation_type = operation_dict['operationType']
        del (operation_dict['operationType'])
        variables['local']['ecrmOperations'][operation_type] = operation_dict
    return variables
Esempio n. 17
0
def get_existing_credentials():

    # Read in the existing conf
    dss = dataiku.api_client()
    project = dss.get_project(dataiku.default_project_key())
    variables = project.get_variables()["standard"]
    conf = variables.get("powerbi-settings", None)

    # Decrypt
    key = request.args.get("api-key")
    pbi = {}
    pbi["powerbi-username"] = conf["username"]
    pbi["powerbi-password"] = decrypt_string(conf["password"], key)
    pbi["powerbi-client-id"] = conf["client_id"]
    pbi["powerbi-client-secret"] = decrypt_string(conf["client_secret"], key)
    pbi["powerbi-resource"] = conf["resource"]
    pbi["powerbi-grant-type"] = conf["grant_type"]
    pbi["powerbi-scope"] = conf["scope"]

    # Send back some results
    return json.dumps(pbi)
def set_client_context(client_row,
                       project_key=None,
                       add_ecrm_context=True,
                       connection='dataiku_workspace'):
    kut.display_message('setting context', secondary=True)
    if not project_key:
        project_name = dataiku.default_project_key()
        project_key = dataiku.api_client().get_project(project_name)
        print('inferring project key:', project_key)
    new_vars = serialize_variables(new_vars=client_row.to_dict(),
                                   project=project_key,
                                   context='local')
    if add_ecrm_context:
        new_vars = add_ECRM_context(new_vars, connection=connection)
    project_key.set_variables(new_vars)
    variables = project_key.get_variables()
    local_variables = project_key.get_variables()['local']
    client_name = local_variables['clientName']
    print('client name:', client_name)
    print(local_variables)
    return variables
Esempio n. 19
0
 def get_default_project(self):
     """
     Get a handle to the current default project, if available (i.e. if dataiku.default_project_key() is valid)
     """
     import dataiku
     return DSSProject(self, dataiku.default_project_key())
Esempio n. 20
0
        )
        sys.exit("AWS S3 Credential error")
    if input_connection["encryptionMode"] != "NONE":
        print(
            "[-] Found the connection {} but it is configured to use encryption which is not currently supported."
            .format(connection_name))
        sys.exit("AWS S3 Credential error")

    AWS_ACCESS_KEY = input_connection["accessKey"]
    AWS_SECRET_KEY = input_connection["secretKey"]
elif USE_PROJECT_VARIABLES:
    print(
        "[+] Use S3 credentials defined as Local, Project, or Global Variables. First, looking in Local Variables..."
    )
    dss = dataiku.api_client()
    project = dss.get_project(dataiku.default_project_key())
    variables = project.get_variables()
    if "snowflake" in variables["local"]:
        if "aws_access_key" in variables["local"][
                "snowflake"] and "aws_secret_key" in variables["local"][
                    "snowflake"]:
            print("[+] Found AWS credentials in Local Variables")
            AWS_ACCESS_KEY = variables["local"]["snowflake"]["aws_access_key"]
            AWS_SECRET_KEY = variables["local"]["snowflake"]["aws_secret_key"]
        else:
            print(
                "[-] 'snowflake' key found in Local Variables but could not retrieve aws_access_key and/or aws_secret_key."
            )
            print("[-] Please check and correct your Local Variables.")
            sys.exit("Local Variables error")
    elif "snowflake" in variables["standard"]:
Esempio n. 21
0
from flask import request
from distutils.util import strtobool
import json
import traceback
import dataiku
from dataiku.customwebapp import get_webapp_config

from design_experiment.sample_size import min_sample_size, z_value
from helpers import save_parameters
from constants import Parameters
from dku_tools import get_output_folder

config_settings = get_webapp_config()
project_key = dataiku.default_project_key()
client = dataiku.api_client()


@app.route('/sample_size', methods=['POST'])
def get_sample_size():
    try:
        config = json.loads(request.data)
        baseline_conversion_rate = float(config.get(Parameters.BCR.value))/100
        minimum_detectable_effect = float(config.get(Parameters.MDE.value))/100
        alpha = 1-float(config.get(Parameters.SIG_LEVEL.value))/100
        power = float(config.get(Parameters.POWER.value))/100
        ratio = float(config.get(Parameters.RATIO.value))/100
        reach = float(config.get(Parameters.REACH.value))/100
        two_tailed = strtobool(config.get(Parameters.TAIL.value))
        sample_size_A, sample_size_B = min_sample_size(baseline_conversion_rate, minimum_detectable_effect, alpha, power, ratio, two_tailed)
        sample_size_A = round(sample_size_A / reach)
        sample_size_B = round(sample_size_B / reach)
Esempio n. 22
0
def is_dataset_valid(dataset_name):
    project_key = default_project_key()
    project = api_client().get_project(project_key)
    dss_dataset = project.get_dataset(dataset_name)
    return dss_dataset.get_settings().type == 'Snowflake'
import dataiku

INPUT_DATASET = "mydataset"
COLUMN_TO_PARTITION_BY = "mypartitioningcolumn"

dataset = dataiku.Dataset(INPUT_DATASET)
df = dataset.get_dataframe(columns=[COLUMN_TO_PARTITION_BY])

combinations = df[COLUMN_TO_PARTITION_BY].unique()
combinations_str = "/".join(combinations)

client = dataiku.api_client()
project = client.get_project(dataiku.default_project_key())
variables = project.get_variables()
variables["standard"]["myPartitionList"] = combinations_str
project.set_variables(variables)
def get_project_variables(scope=None, project_key=None):
    project_key = dataiku.default_project_key(
    ) if not project_key else project_key
    project = dataiku.api_client().get_project(project_key)
    return project.get_variables()[scope] if scope else project.get_variables()