Exemple #1
0
def send_files(execution_id, files, path=None):
    if path is None:
        path = "_dku_send.tgz"
    logging.info("Sending %d globs to %s" % (len(files), path))
    all_files = []
    for g in files:
        all_files.extend(glob.glob(g))
    logging.info("Sending files to %s: %s" % (path, all_files))
    tmp_archive = '/tmp/_dku_send.%s.tgz' % ''.join(
        random.choice(string.ascii_lowercase + string.digits)
        for _ in range(8))
    error_code = os.system('bash -c "tar czf %s %s"' %
                           (tmp_archive, ' '.join(all_files)))
    if error_code != 0:
        raise Exception("Error compressing %d paths, return code was %d" %
                        (len(all_files), error_code))
    with open(tmp_archive, 'rb') as f:
        intercom.jek_or_backend_void_call(
            'containers/put-file',
            params={
                'executionId': execution_id,
                'fileKind': 'CONTEXT_DIR',
                'path': path,
                'expand': 'true'
            },
            files={'file': ('_dku_send.tgz', f, 'application/gzip')})
    os.remove(tmp_archive)
Exemple #2
0
 def upload_call(g):
     jek_or_backend_void_call("managed-folders/upload-path",
                              params={
                                  "projectKey": self.project_key,
                                  "lookup": self.folder_id,
                                  "path": self.path
                              },
                              data=g)
Exemple #3
0
def handle_subprocess_return_value(execution_id,
                                   error_code,
                                   path='error.json',
                                   send_error_json=True,
                                   fail_if_subprocess_failed=False):
    if os.WIFEXITED(error_code):
        status = "exited with status"
        error_code = os.WEXITSTATUS(error_code)
    elif os.WIFSTOPPED(error_code):
        status = "stopped by signal"
        error_code = os.WSTOPSIG(error_code)
    elif os.WIFSIGNALED(error_code):
        status = "terminated by signal"
        error_code = os.WTERMSIG(error_code)
        if error_code == 9:
            # Supposedly didn't get the opportunity to send a structured error
            send_error_json = True
    else:
        status = "finished with code"
    status = "Containerized process %s %d" % (status, error_code)
    if error_code == 0:
        logging.info(status)
    else:
        logging.error(status)

    if send_error_json:
        if error_code != 0 and not os.path.isfile('error.json'):
            # Subprocess has not created an error file but I am supposed to send one, so craft it
            message = "Containerized process execution failed, return code %d" % error_code
            if error_code == 9:
                message = message + " (killed - maybe out of memory?)"
            with open("error.json", 'w') as fd:
                json.dump(
                    {
                        "errorType": safe_unicode_str("SubProcessFailed"),
                        "message": safe_unicode_str(message)
                    }, fd)
        if os.path.isfile('error.json'):
            logging.info("Sending error.json to backend/JEK")
            try:
                with open('error.json', 'rb') as f:
                    intercom.jek_or_backend_void_call(
                        'containers/put-file',
                        params={
                            'executionId': execution_id,
                            'fileKind': 'EXECUTION_DIR',
                            'path': path
                        },
                        files={'file': ('error.json', f, 'application/json')})
            except Exception as e:
                logging.error("Could not send error to backend: %s" % e)

    if error_code != 0 and fail_if_subprocess_failed:
        logging.error("Failing container because subprocess failed (code %s)" %
                      error_code)
        sys.exit(error_code)
Exemple #4
0
def send_file(execution_id, file, path=None):
    if path is None:
        path = file
    logging.info("Sending %s to %s" % (file, path))
    with open(file, 'rb') as f:
        intercom.jek_or_backend_void_call(
            'containers/put-file',
            params={
                'executionId': execution_id,
                'fileKind': 'CONTEXT_DIR',
                'path': path,
                'expand': 'false'
            },
            files={'file': (file, f, 'application/gzip')})
Exemple #5
0
 def clear_path(self, path):
     """Removes a file or directory from the folder"""
     return jek_or_backend_void_call(
         "managed-folders/clear-path", {
             "projectKey": self.project_key,
             "lookup": self.short_name,
             "path": path
         })
Exemple #6
0
 def clear_partition(self, partition):
     """Removes all files from a specific partition of the folder."""
     return jek_or_backend_void_call(
         "managed-folders/clear-partition", {
             "projectKey": self.project_key,
             "lookup": self.short_name,
             "partition": partition
         })
Exemple #7
0
    def write_metadata(self, meta):
        """Writes the dataset metadata object"""

        #if not self.writable:
        #    raise Exception("You cannot write the metadata for the dataset %s, "
        #                    "as it is not declared as an output" % self.name)

        if "checklists" not in meta:
            raise Exception("'checklists' is missing")
        if "custom" not in meta:
            raise Exception("'custom' is missing")
        if "tags" not in meta:
            raise Exception("'tags' is missing")

        intercom.jek_or_backend_void_call("datasets/write-metadata",
                                          data={
                                              "fullDatasetName":
                                              self.full_name,
                                              "metadata": json.dumps(meta)
                                          })
Exemple #8
0
    def write_schema(self, columns, dropAndCreate=False):
        """Write the dataset schema into the dataset JSON
        definition file.

        Sometimes, the schema of a dataset being written is
        known only by the code of the Python script itself.
        In that case, it can be useful for the Python script
        to actually modify the schema of the dataset.
        Obviously, this must be used with caution.
        'columns' must be an array of dicts like
        { 'name' : 'column name', 'type' : 'column type'}
        """
        if not self.writable:
            raise Exception("You cannot write the schema for the dataset %s, "
                            "as it is not declared as an output" % self.name)
        for column in columns:
            if "type" not in column:
                raise Exception("Columns %s has no attribute type" %
                                str(column))
            if "name" not in column:
                raise Exception("Columns %s has no attribute name" %
                                str(column))
            if not isinstance(column['name'], base.dku_basestring_type):
                raise Exception("Columns %s name attribute is not a string" %
                                str(column))
            if not isinstance(column['type'], base.dku_basestring_type):
                raise Exception("Columns %s type attribute is not a string" %
                                str(column))

        intercom.jek_or_backend_void_call("datasets/set-schema/",
                                          data={
                                              "fullDatasetName":
                                              self.full_name,
                                              "schemaData":
                                              json.dumps({
                                                  "userModified": False,
                                                  "columns": columns
                                              }),
                                              "dropAndCreate":
                                              dropAndCreate
                                          })
Exemple #9
0
    def upload_stream(self, path, f):
        """
        Uploads the content of a file-like object to a specific path in the managed folder.
        If the file already exists, it will be replaced.

        .. code-block:: python

            # This copies a local file to the managed folder
            with open("local_file_to_upload") as f:
                folder.upload_stream("name_of_file_in_folder", f)

        :param str path: Target path of the file to write in the managed folder
        :param f: file-like object open for reading
        """
        return jek_or_backend_void_call("managed-folders/upload-path",
                                        params={
                                            "projectKey": self.project_key,
                                            "lookup": self.short_name,
                                            "path": path
                                        },
                                        data=f)
Exemple #10
0
                    "./remote-run-env-def.json")
        fetch_dir(execution_id, 'splits')

        logging.info("Running doctor server")
        run_subprocess(execution_id,
                       '%s -m dataiku.container.exec_doctor_train' %
                       python_bin,
                       path="%s/error.json" % definition['workDir'],
                       send_error_json=False,
                       fail_if_subprocess_failed=True)

        try:
            intercom.jek_or_backend_void_call(
                'containers/put-file',
                params={
                    'executionId': execution_id,
                    'fileKind': 'CONTEXT_DIR',
                    'path': '%s/container_done.txt' % definition['workDir']
                },
                files={'file': ('container_done.txt', '1', 'text/plain')})
        except Exception as e:
            logging.error("Could not send result: %s" % e)

    elif execution['type'] == 'RECIPE_PREDICTION_TRAIN_PYTHON' or execution[
            'type'] == 'RECIPE_CLUSTERING_TRAIN_PYTHON':
        logging.info("Training doctor model")

        fetch_libs(execution_id, 'project')
        fetch_libs(execution_id, 'instance')

        fetch_dir(execution_id, '', 'model')
        os.mkdir('selection')  # Unused for now
Exemple #11
0
 def push_data(self,id,generator):
     jek_or_backend_void_call("datasets/push-data/", params={"id": id}, data=generator, err_msg="Streaming: push-data call failed")