Beispiel #1
0
 def set_scenario_variables(self, **kwargs):
     """
     Define additional variables in this scenario run
     """
     backend_json_call("scenarios/set-variables/",
                       {"variables": json.dumps(kwargs)},
                       err_msg="Failed to set scenario variables")
Beispiel #2
0
 def set_percentage(self, percentage, no_fail=True):
     try:
         backend_json_call("futures/posttrain-set-percentage", data={
             "futureId": self.future_id,
             "percentage": percentage
         })
         return True
     except Exception as e:
         if no_fail:
             return False
         else:
             raise e
Beispiel #3
0
    def wait_for_completion(self, step_future=None):
        """
        Awaits the termination of the step and returns its result. If the step
        failed, an Exception is raised.
        """
        if self.future_id is None:
            raise Exception("Step not started")
        if step_future is None:
            step_future = {'hasResult': False}

        while not step_future['hasResult']:
            time.sleep(
                5)  # sleep a lot, this is expected to be long running tasks
            step_future = backend_json_call("futures/get-update",
                                            data={"futureId": self.future_id})

        if not step_future['hasResult']:
            raise Exception("Step failed to run")

        self.result = step_future['result']

        ret = self.get_result()

        if self.fail_fatal:
            if ret.get_outcome() == "ABORTED":
                raise Exception("Scenario step was aborted")
            elif ret.get_outcome() == "FAILED":
                raise StepFailedException("Scenario step failed: %s" %
                                          ret.get_error_message())

        return ret
Beispiel #4
0
    def list(project_key=None):
        """Lists the names of datasets. If project_key is None, the current
        project key is used."""
        project_key = project_key or default_project_key()

        return intercom.backend_json_call("datasets/list",
                                          data={"projectKey": project_key})
Beispiel #5
0
def get_cluster_from_dss_cluster(dss_cluster_id):
    # get the public API client
    client = dataiku.api_client()

    # get the cluster object in DSS
    found = False
    for c in client.list_clusters():
        if c['name'] == dss_cluster_id:
            found = True
    if not found:
        raise Exception("DSS cluster %s doesn't exist" % dss_cluster_id)
    dss_cluster = client.get_cluster(dss_cluster_id)

    # get the settings in it
    dss_cluster_settings = dss_cluster.get_settings()
    dss_cluster_config = dss_cluster_settings.get_raw()['params']['config']
    # resolve since we get the config with the raw preset setup
    dss_cluster_config = backend_json_call(
        'plugins/get-resolved-settings',
        data={
            'elementConfig': json.dumps(dss_cluster_config),
            'elementType': dss_cluster_settings.get_raw()['type']
        })
    logging.info("Resolved cluster config : %s" %
                 json.dumps(dss_cluster_config))
    # build the helper class from the cluster settings (the macro doesn't have the params)
    clusters = get_cluster_from_connection_info(
        dss_cluster_config['config']['connectionInfo'],
        dss_cluster_config['pluginConfig']['connectionInfo'])

    cluster_data = dss_cluster_settings.get_plugin_data()

    return cluster_data, clusters, dss_cluster_settings, dss_cluster_config
Beispiel #6
0
 def get_last_check_values(self):
     """
     Get the set of last values of the checks on this project, as a :class:`dataiku.ComputedChecks` object
     """
     return metrics.ComputedChecks(
         backend_json_call("checks/projects/get-last-values",
                           data={"projectKey": self.project_key}))
Beispiel #7
0
    def get_previous_steps_outputs(self):
        """
        Returns the results of the steps previously executed in this scenario run. For example, if a SQL
        step ran before in the scenario, and its name is 'the_sql', then the list returned by this
        function will be like::

            [
                ...
                {
                    'stepName': 'the_sql',
                    'result': {
                        'success': True,
                        'hasResultset': True,
                        'columns': [ {'type': 'int8', 'name': 'a'}, {'type': 'varchar', 'name': 'b'} ],
                        'totalRows': 2,
                        'rows': [
                                    ['1000', 'min'],
                                    ['2500', 'max']
                                ],
                        'log': '',
                        'endedOn': 0,
                        'totalRowsClipped': False
                    }
                },
                ...
            ]

        Important note: the exact structure of each type of step run output is not precisely defined, and may vary 
        from a DSS release to another
        """
        return backend_json_call("scenarios/get-step-outputs/",
                                 err_msg="Failed to read step outputs")
Beispiel #8
0
    def get_info(self):
        if self.info is None:
            self.info = backend_json_call("savedmodels/get-info",
                                          data={
                                              "projectKey": self.project_key,
                                              "lookup": self.short_name
                                          })["info"]

        return self.info
Beispiel #9
0
 def list_models(project_key=None):
     """
     Retrieve the list of saved models
     
     :param project_key: key of the project from which to list models
     """
     project_key = project_key or default_project_key()
     return backend_json_call("savedmodels/list",
                              data={"projectKey": project_key})
Beispiel #10
0
 def get_last_metric_values(self, partition=''):
     """
     Get the set of last values of the metrics on this folder, as a :class:`dataiku.ComputedMetrics` object
     """
     return metrics.ComputedMetrics(
         backend_json_call("metrics/managed-folders/get-last-values",
                           data={
                               "projectKey": self.project_key,
                               "folderId": self.get_id(),
                           }))
Beispiel #11
0
 def get_last_metric_values(self, partition=''):
     """
     Get the set of last values of the metrics on this dataset, as a :class:`dataiku.ComputedMetrics` object
     """
     return metrics.ComputedMetrics(
         intercom.backend_json_call("metrics/datasets/get-last-values",
                                    data={
                                        "projectKey": self.project_key,
                                        "datasetName": self.short_name,
                                        "partition": partition
                                    }))
Beispiel #12
0
    def get_definition(self):
        if self.definition is None:
            self.definition = backend_json_call("savedmodels/get",
                                                data={
                                                    "projectKey":
                                                    self.project_key,
                                                    "savedModelId":
                                                    self.get_id()
                                                })

        return self.definition
Beispiel #13
0
 def get_state():
     logging.info("poll state")
     remote_kernel = backend_json_call(
         "jupyter/poll-remote-kernel",
         data={
             "contextProjectKey":
             remoterun.get_env_var("DKU_CURRENT_PROJECT_KEY"),
             "batchId":
             self.batch_id
         })
     logging.info("Polled, got : %s" % json.dumps(remote_kernel))
     return remote_kernel.get("state", None)
Beispiel #14
0
    def send(self, additional_variables={}, **kwargs):
        final_params = self.channel_params.copy()
        final_params.update(kwargs)

        data = {
            "messaging" : json.dumps({
                "type" : self.type,
                "configuration" : final_params,
            }),
            "variables" : json.dumps(additional_variables)
        }
        return backend_json_call("scenarios/send-message", data)
Beispiel #15
0
    def get_dataset_last_build(self, dataset_name, project_key=None):
        """
        Get informations about the last build of the dataset and returns a map of:
        - projectKey and id : the identifier of the dataset
        - jobProjectKey and jobId : the identifier of the job that built the dataset
        - buildEndTime : timestamp of when the build finished
        - buildSuccess : final status of the build
        """
        data = {"objectId": dataset_name}
        if project_key is not None:
            data["projectKey"] = project_key

        return backend_json_call("scenarios/get-object-last-build", data)
Beispiel #16
0
    def save_external_check_values(self, values_dict):
        """
        Save checks on this project. The checks are saved with the type "external"

        :param values_dict: the values to save, as a dict. The keys of the dict are used as check names
        """
        return backend_json_call(
            "checks/projects/save-external-values",
            data={
                "projectKey": self.project_key,
                "data": json.dumps(values_dict)
            },
            err_msg="Failed to save external check values")
Beispiel #17
0
    def list_versions(self):
        """
        List the versions this saved model contains
        """
        if self.versions is None:
            self.versions = backend_json_call("savedmodels/list-versions",
                                              data={
                                                  "projectKey":
                                                  self.project_key,
                                                  "savedModelId":
                                                  self.get_id()
                                              })

        return self.versions
Beispiel #18
0
 def get_check_history(self, check_lookup):
     """
     Get the set of all values a given check took on this project
     :param check_lookup: check name or unique identifier
     """
     return backend_json_call(
         "checks/projects/get-metric-history",
         data={
             "projectKey":
             self.project_key,
             "checkLookup":
             check_lookup if isinstance(check_lookup, str) or isinstance(
                 check_lookup, unicode) else json.dumps(check_lookup)
         })
Beispiel #19
0
    def start(self):
        """
        Launches the execution of the step
        """
        if self.future_id is not None:
            raise Exception("Step already started")
        self.future_id = None
        self.result = None

        step_future = backend_json_call(
            "scenarios/run-step/",
            data={"stepData": json.dumps(self.step)},
            err_msg="Step failed to start")
        self.future_id = step_future['jobId']
Beispiel #20
0
 def get_metric_history(self, metric_lookup):
     """
     Get the set of all values a given metric took on this project
     :param metric_lookup: metric name or unique identifier
     """
     return backend_json_call(
         "metrics/projects/get-metric-history",
         data={
             "projectKey":
             self.project_key,
             "metricLookup":
             metric_lookup if isinstance(metric_lookup, str) or isinstance(
                 metric_lookup, unicode) else json.dumps(metric_lookup)
         })
Beispiel #21
0
    def save_external_metric_values(self, values_dict):
        """
        Save metrics on this folder. The metrics are saved with the type "external"

        :param values_dict: the values to save, as a dict. The keys of the dict are used as metric names
        """
        return backend_json_call(
            "metrics/managed-folders/save-external-values",
            data={
                "projectKey": self.project_key,
                "folderId": self.get_id(),
                "data": json.dumps(values_dict)
            },
            err_msg="Failed to save external metric values")
Beispiel #22
0
    def get_version_metrics(self, version_id):
        """
        Get the training metrics of a version of this model, as a :class:`.SavedModelVersionMetrics`

        :param version_id: the unique identifier of the version for which to retrieve metrics
        """
        return SavedModelVersionMetrics(
            metrics.ComputedMetrics(
                backend_json_call(
                    "metrics/saved-models/get-values-for-version",
                    data={
                        "projectKey": self.project_key,
                        "modelId": self.get_id(),
                        "modelVersionId": version_id
                    })))
Beispiel #23
0
    def save_external_check_values(self, values_dict, partition=''):
        """
        Save checks on this dataset. The checks are saved with the type "external"

        :param values_dict: the values to save, as a dict. The keys of the dict are used as check names
        """
        return intercom.backend_json_call(
            "checks/datasets/save-external-values",
            data={
                "projectKey": self.project_key,
                "datasetName": self.short_name,
                "partitionId": partition,
                "data": json.dumps(values_dict)
            },
            err_msg="Failed to save external check values")
Beispiel #24
0
    def save_external_metric_values(self, values_dict, partition=''):
        """
        Save metrics on this dataset. The metrics are saved with the type "external"

        :param values_dict: the values to save, as a dict. The keys of the dict are used as metric names
        :param partition: optionally, the partition for which the values are to be saved
        """
        return intercom.backend_json_call(
            "metrics/datasets/save-external-values",
            data={
                "projectKey": self.project_key,
                "datasetName": self.short_name,
                "partitionId": partition,
                "data": json.dumps(values_dict)
            },
            err_msg="Failed to save external metric values")
Beispiel #25
0
    def is_done(self):
        """
        Checks whether a running step is finished
        """
        if self.future_id is None:
            raise Exception("Step not started")

        step_future = backend_json_call("futures/get-update",
                                        data={"futureId": self.future_id},
                                        err_msg="Failed to track step future")

        if step_future['hasResult']:
            self.result = step_future['result']
            return True
        else:
            return False
Beispiel #26
0
    def get_model_folder(self, version_id=None):

        if version_id is None:
            version_id = [
                x for x in self.saved_model.list_versions() if x["active"]
            ][0]["versionId"]

        res = backend_json_call("savedmodels/get-model-details",
                                data={
                                    "projectKey": self.project_key,
                                    "smId": self.saved_model.get_id(),
                                    "versionId": version_id
                                })

        model_folder = res["model_folder"]

        return model_folder
Beispiel #27
0
 def get_metric_history(self, metric_lookup, partition=''):
     """
     Get the set of all values a given metric took on this folder
     :param metric_lookup: metric name or unique identifier
     :param partition: optionally, the partition for which the values are to be fetched
     """
     return backend_json_call(
         "metrics/managed-folders/get-metric-history",
         data={
             "projectKey":
             self.project_key,
             "folderId":
             self.get_id(),
             "metricLookup":
             metric_lookup if isinstance(metric_lookup, str) or isinstance(
                 metric_lookup, unicode) else json.dumps(metric_lookup)
         })
    def run(self, progress_callback):
        clobber = self.config.get("clobber", False)
        prefix = self.config.get("prefix")

        connections = set()

        done = 0
        for project_key in self.project_keys:
            project = self.client.get_project(project_key)

            for dataset_name in Dataset.list(project_key=project_key):
                d = project.get_dataset(dataset_name)
                connection_name = d.get_definition().get('params', {}).get(
                    'connection', None)
                if connection_name is not None:
                    connections.add(connection_name)

            sql_notebooks = intercom.backend_json_call(
                "sql-notebooks/list/", data={"projectKey": project_key})
            for sql_notebook in sql_notebooks:
                connection_name = sql_notebook.get('connection', None)
                if connection_name is not None:
                    m = re.search('@virtual\(([^\)]+)\):(.*)', connection_name)
                    if m is not None:
                        connection_name = 'hive-%s' % m.group(2)

                    connections.add(connection_name)

            meta = project.get_metadata()

            # Update tags list
            if clobber:
                tags = [x for x in meta["tags"] if not x.startswith(prefix)]
            else:
                tags = meta["tags"]
            tags.extend([
                "%s%s" % (prefix, connection)
                for connection in list(connections)
            ])

            meta["tags"] = tags
            project.set_metadata(meta)

            done += 1
            progress_callback(done)
Beispiel #29
0
    def __init__(self,
                 connection,
                 query,
                 pre_queries=None,
                 post_queries=None,
                 find_connection_from_dataset=False,
                 db_type='sql',
                 extra_conf={},
                 timeOut=600000,
                 script_steps=None,
                 script_input_schema=None,
                 script_output_schema=None):
        data = {
            "connection":
            connection,
            "query":
            query,
            "preQueries":
            json.dumps(pre_queries),
            "postQueries":
            json.dumps(post_queries),
            "findConnectionFromDataset":
            find_connection_from_dataset,
            "timeOut":
            timeOut,
            "dbType":
            db_type,
            "extraConf":
            json.dumps(extra_conf),
            "scriptSteps":
            json.dumps(script_steps) if script_steps is not None else None,
            "scriptInputSchema":
            json.dumps(script_input_schema)
            if script_input_schema is not None else None,
            "scriptOutputSchema":
            json.dumps(script_output_schema)
            if script_output_schema is not None else None
        }
        logging.info("Starting SQL query reader")

        # initiate the streaming (blocks until the database says it's ready to return values)
        self.streamingSession = backend_json_call(
            "sql-queries/start-streaming", data=data)

        logging.info("Got initial SQL query response")
Beispiel #30
0
    def get_metric_history(self, metric_lookup, partition=''):
        """
        Get the set of all values a given metric took on this dataset

        :param metric_lookup: metric name or unique identifier
        :param partition: optionally, the partition for which the values are to be fetched
        """
        return intercom.backend_json_call(
            "metrics/datasets/get-metric-history",
            data={
                "projectKey":
                self.project_key,
                "datasetName":
                self.short_name,
                "partition":
                partition,
                "metricLookup":
                metric_lookup if isinstance(metric_lookup, str) or isinstance(
                    metric_lookup, unicode) else json.dumps(metric_lookup)
            },
            err_msg="Failed to get metric history")