예제 #1
0
def _get_experiments_dir():
    """
    Gets the root folder where the experiments are writing their results

    Returns:
        The folder where the experiments are writing results
    """
    assert hdfs.exists(
        hdfs.project_path() + "Experiments"
    ), "Your project is missing a dataset named Experiments, please create it."
    return hdfs.project_path() + "Experiments"
예제 #2
0
def export(local_model_path, model_name, model_version):

    project_path = hdfs.project_path()

    # Create directory with model name
    hdfs_handle = hdfs.get()
    model_name_root_directory = project_path + '/Models/' + str(model_name) + '/' + str(model_version) + '/'
    hdfs_handle.create_directory(model_name_root_directory)

    for (path, dirs, files) in os.walk(local_model_path):

        hdfs_export_subpath = path.replace(local_model_path, '')

        current_hdfs_dir = model_name_root_directory + '/' + hdfs_export_subpath

        if not hdfs_handle.exists(current_hdfs_dir):
            hdfs_handle.create_directory(model_name_root_directory)

        for f in files:
            if not hdfs_handle.exists(current_hdfs_dir + '/' + f):
                pydoop.hdfs.put(path + '/' + f, current_hdfs_dir)

        for d in dirs:
            if not hdfs_handle.exists(current_hdfs_dir + '/' + d):
                pydoop.hdfs.put(path + '/' + d, current_hdfs_dir + '/')
        break
예제 #3
0
파일: util.py 프로젝트: tabularaza27/maggy
def _finalize_experiment(
    experiment_json,
    metric,
    app_id,
    run_id,
    state,
    duration,
    logdir,
    best_logdir,
    optimization_key,
):
    """Attaches the experiment outcome as xattr metadata to the app directory.
    """
    outputs = _build_summary_json(logdir)

    if outputs:
        hopshdfs.dump(outputs, logdir + "/.summary.json")

    if best_logdir:
        experiment_json["bestDir"] = best_logdir[len(hopshdfs.project_path()):]
    experiment_json["optimizationKey"] = optimization_key
    experiment_json["metric"] = metric
    experiment_json["state"] = state
    experiment_json["duration"] = duration

    experiment_utils._attach_experiment_xattr(app_id, run_id, experiment_json,
                                              "REPLACE")
예제 #4
0
def _handle_return_simple(retval, hdfs_exec_logdir, logfile):
    """

    Args:
        val:
        hdfs_exec_logdir:

    Returns:

    """
    return_file = hdfs_exec_logdir + '/.outputs.json'

    if not retval:
        if logfile is not None:
            retval = {'log': logfile}
            hdfs.dump(dumps(retval), return_file)
        return

    _upload_file_output(retval, hdfs_exec_logdir)

    # Validation
    if type(retval) is not dict:
        try:
            retval = {'metric': retval}
        except:
            pass

    retval['log'] = hdfs_exec_logdir.replace(hdfs.project_path(),
                                             '') + '/output.log'

    hdfs.dump(dumps(retval), return_file)
예제 #5
0
def _init_logger(exec_logdir, role=None, index=None):
    """
    Initialize the logger by opening the log file and pointing the global fd to the open file
    """

    prefix = ''
    if role != None and index != None:
        prefix = str(role) + '_' + str(index) + '_'

    logfile = exec_logdir + '/' + prefix + 'output.log'
    fs_handle = hdfs.get_fs()
    global logger_fd
    try:
        logger_fd = fs_handle.open_file(logfile, mode='w')
    except:
        logger_fd = fs_handle.open_file(logfile, flags='w')

    # save the builtin print
    original_print = __builtin__.print

    def experiment_print(*args, **kwargs):
        """Experiments custom print() function."""
        log(' '.join(str(x) for x in args))
        original_print(*args, **kwargs)

    # override the builtin print
    __builtin__.print = experiment_print

    return logfile.replace(hdfs.project_path(), '')
예제 #6
0
def _version_resources(versioned_resources, rundir):
    """

    Args:
        versioned_resources:
        rundir:

    Returns:

    """
    if not versioned_resources:
        return None
    pyhdfs_handle = hdfs.get()
    pyhdfs_handle.create_directory(rundir)
    endpoint_prefix = hdfs.project_path()
    versioned_paths = []
    for hdfs_resource in versioned_resources:
        if pydoop.hdfs.path.exists(hdfs_resource):
            log("Versoning resource '%s' in rundir '%s'" %
                (hdfs_resource, rundir))

            # Remove the file if it exists
            target_path = os.path.join(rundir, os.path.basename(hdfs_resource))
            if hdfs.exists(target_path):
                hdfs.rmr(target_path)

            hdfs.cp(hdfs_resource, rundir)
            path, filename = os.path.split(hdfs_resource)
            versioned_paths.append(
                rundir.replace(endpoint_prefix, '') + '/' + filename)
        else:
            log("Resource not found '%s'" % hdfs_resource, level='warning')
            #raise Exception('Could not find resource in specified path: ' + hdfs_resource)

    return ', '.join(versioned_paths)
예제 #7
0
def _version_resources(versioned_resources, rundir):
    """

    Args:
        versioned_resources:
        rundir:

    Returns:

    """
    if not versioned_resources:
        return None
    pyhdfs_handle = hdfs.get()
    pyhdfs_handle.create_directory(rundir)
    endpoint_prefix = hdfs.project_path()
    versioned_paths = []
    for hdfs_resource in versioned_resources:
        if pydoop.hdfs.path.exists(hdfs_resource):
            pyhdfs_handle.copy(hdfs_resource, pyhdfs_handle, rundir)
            path, filename = os.path.split(hdfs_resource)
            versioned_paths.append(
                rundir.replace(endpoint_prefix, '') + '/' + filename)
        else:
            raise Exception('Could not find resource in specified path: ' +
                            hdfs_resource)

    return ', '.join(versioned_paths)
예제 #8
0
def _create_experiment_subdirectories(app_id,
                                      run_id,
                                      param_string,
                                      type,
                                      sub_type=None,
                                      params=None):
    """
    Creates directories for an experiment, if Experiments folder exists it will create directories
    below it, otherwise it will create them in the Logs directory.

    Args:
        :app_id: YARN application ID of the experiment
        :run_id: Experiment ID
        :param_string: name of the new directory created under parent directories
        :type: type of the new directory parent, e.g differential_evolution
        :sub_type: type of sub directory to parent, e.g generation
        :params: dict of hyperparameters

    Returns:
        The new directories for the yarn-application and for the execution (hdfs_exec_logdir, hdfs_appid_logdir)
    """

    pyhdfs_handle = hdfs.get()

    hdfs_events_parent_dir = hdfs.project_path() + "Experiments"

    hdfs_experiment_dir = hdfs_events_parent_dir + "/" + app_id + "_" + str(
        run_id)

    # determine directory structure based on arguments
    if sub_type:
        hdfs_exec_logdir = hdfs_experiment_dir + "/" + str(
            sub_type) + '/' + str(param_string)
        if pyhdfs_handle.exists(hdfs_exec_logdir):
            hdfs.delete(hdfs_exec_logdir, recursive=True)
    elif not param_string and not sub_type:
        if pyhdfs_handle.exists(hdfs_experiment_dir):
            hdfs.delete(hdfs_experiment_dir, recursive=True)
        hdfs_exec_logdir = hdfs_experiment_dir + '/'
    else:
        hdfs_exec_logdir = hdfs_experiment_dir + '/' + str(param_string)
        if pyhdfs_handle.exists(hdfs_exec_logdir):
            hdfs.delete(hdfs_exec_logdir, recursive=True)

    # Need to remove directory if it exists (might be a task retry)

    # create the new directory
    pyhdfs_handle.create_directory(hdfs_exec_logdir)

    return_file = hdfs_exec_logdir + '/.hparams.json'
    hdfs.dump(dumps(params), return_file)

    return hdfs_exec_logdir, hdfs_experiment_dir
예제 #9
0
def _upload_file_output(retval, hdfs_exec_logdir):
    if type(retval) is dict:
        for metric_key in retval.keys():
            value = str(retval[metric_key])
            if '/' in value or os.path.exists(os.getcwd() + '/' + value):
                if os.path.exists(value):  # absolute path
                    if hdfs.exists(hdfs_exec_logdir + '/' +
                                   value.split('/')[-1]):
                        hdfs.delete(hdfs_exec_logdir + '/' +
                                    value.split('/')[-1],
                                    recursive=False)
                    pydoop.hdfs.put(value, hdfs_exec_logdir)
                    os.remove(value)
                    hdfs_exec_logdir = hdfs.abs_path(hdfs_exec_logdir)
                    retval[metric_key] = hdfs_exec_logdir[
                        len(hdfs.abs_path(hdfs.project_path())
                            ):] + '/' + value.split('/')[-1]
                elif os.path.exists(os.getcwd() + '/' +
                                    value):  # relative path
                    output_file = os.getcwd() + '/' + value
                    if hdfs.exists(hdfs_exec_logdir + '/' + value):
                        hdfs.delete(hdfs_exec_logdir + '/' + value,
                                    recursive=False)
                    pydoop.hdfs.put(value, hdfs_exec_logdir)
                    os.remove(output_file)
                    hdfs_exec_logdir = hdfs.abs_path(hdfs_exec_logdir)
                    retval[metric_key] = hdfs_exec_logdir[
                        len(hdfs.abs_path(hdfs.project_path())
                            ):] + '/' + output_file.split('/')[-1]
                elif value.startswith('Experiments') and value.endswith(
                        'output.log'):
                    continue
                elif value.startswith('Experiments') and hdfs.exists(
                        hdfs.project_path() + '/' + value):
                    hdfs.cp(hdfs.project_path() + '/' + value,
                            hdfs_exec_logdir)
                else:
                    raise Exception(
                        'Could not find file or directory on path ' +
                        str(value))
예제 #10
0
def _get_metric(param_string, app_id, generation_id, run_id):
    project_path = hopshdfs.project_path()
    handle = hopshdfs.get()
    for i in range(generation_id):
        possible_result_path = hopshdfs.get_experiments_dir() + '/' + app_id + '/differential_evolution/run.' \
                               + str(run_id) + '/generation.' + str(i) + '/' + param_string + '/metric'
        if handle.exists(possible_result_path):
            with pydoop.hdfs.open(possible_result_path, "r") as fi:
                metric = float(fi.read())
                fi.close()
                return metric

    return None
예제 #11
0
def _finalize_experiment(experiment_json, metric, app_id, run_id, state,
                         duration, logdir, bestLogdir, optimization_key):

    summary_file = _build_summary_json(logdir)

    if summary_file:
        hdfs.dump(summary_file, logdir + '/.summary.json')

    if bestLogdir:
        experiment_json['bestDir'] = bestLogdir[len(hdfs.project_path()):]
    experiment_json['optimizationKey'] = optimization_key
    experiment_json['metric'] = metric
    experiment_json['state'] = state
    experiment_json['duration'] = duration

    _attach_experiment_xattr(app_id, run_id, experiment_json, 'REPLACE')
예제 #12
0
파일: util.py 프로젝트: tabularaza27/maggy
def _handle_return_val(return_val, log_dir, optimization_key, log_file):
    """Handles the return value of the user defined training function.
    """
    experiment_utils._upload_file_output(return_val, log_dir)

    # Return type validation
    if not optimization_key:
        raise ValueError("Optimization key cannot be None.")
    if not return_val:
        raise exceptions.ReturnTypeError(optimization_key, return_val)
    if not isinstance(return_val, constants.USER_FCT.RETURN_TYPES):
        raise exceptions.ReturnTypeError(optimization_key, return_val)
    if isinstance(return_val, dict) and optimization_key not in return_val:
        raise KeyError(
            "Returned dictionary does not contain optimization key with the "
            "provided name: {}".format(optimization_key))

    # validate that optimization metric is numeric
    if isinstance(return_val, dict):
        opt_val = return_val[optimization_key]
    else:
        opt_val = return_val
        return_val = {optimization_key: opt_val}

    if not isinstance(opt_val, constants.USER_FCT.NUMERIC_TYPES):
        raise exceptions.MetricTypeError(optimization_key, opt_val)

    # for key, value in return_val.items():
    #    return_val[key] = value if isinstance(value, str) else str(value)

    return_val["log"] = log_file.replace(hopshdfs.project_path(), "")

    return_file = log_dir + "/.outputs.json"
    hopshdfs.dump(json.dumps(return_val, default=json_default_numpy),
                  return_file)

    metric_file = log_dir + "/.metric"
    hopshdfs.dump(json.dumps(opt_val, default=json_default_numpy), metric_file)

    return opt_val
예제 #13
0
    """Download and parse MNIST dataset."""

    with tf.io.gfile.GFile(directory + images_file, 'rb') as f:
        images = extract_images(f)
        images = images.reshape(images.shape[0],
                                images.shape[1] * images.shape[2])
        images = images.astype(numpy.float32)
        images = numpy.multiply(images, 1.0 / 255.0)

    with tf.io.gfile.GFile(directory + labels_file, 'rb') as f:
        labels = extract_labels(f)

    return images, labels


directory = hdfs.project_path() + 'Resources/' + td_dir + '/'

train_images, train_labels = load_dataset(directory,
                                          'train-images-idx3-ubyte.gz',
                                          'train-labels-idx1-ubyte.gz')
test_images, test_labels = load_dataset(directory, 't10k-images-idx3-ubyte.gz',
                                        't10k-labels-idx1-ubyte.gz')

from pyspark.sql.types import *
from pyspark.sql import SparkSession

spark = SparkSession.builder.appName("create_mnist_td").getOrCreate()
data = [(train_images[i].tolist(), int(test_labels[i]))
        for i in range(len(test_images))]
schema = StructType([
    StructField("image", ArrayType(FloatType())),
예제 #14
0
def export(model_path, model_name, model_version=1, overwrite=False):
    """
    Copies a trained model to the Models directory in the project and creates the directory structure of:

    >>> Models
    >>>      |
    >>>      - model_name
    >>>                 |
    >>>                 - version_x
    >>>                 |
    >>>                 - version_y

    For example if you run this:

    >>> serving.export("iris_knn.pkl", "irisFlowerClassifier", 1, overwrite=True)

    it will copy the local model file "iris_knn.pkl" to /Projects/projectname/Models/irisFlowerClassifier/1/iris.knn.pkl
    on HDFS, and overwrite in case there already exists a file with the same name in the directory.

    If you run:

    >>> serving.export("Resources/iris_knn.pkl", "irisFlowerClassifier", 1, overwrite=True)

    it will first check if the path Resources/iris_knn.pkl exists on your local filesystem in the current working
    directory. If the path was not found, it will check in your project's HDFS directory and if it finds the model there
    it will copy it to /Projects/projectname/Models/irisFlowerClassifier/1/iris.knn.pkl

    If "model" is a directory on the local path exported by tensorflow, and you run:
:
    >>> serving.export("/model/", "mnist", 1, overwrite=True)

    It will copy the model directory contents to /Projects/projectname/Models/mnist/1/ , e.g the "model.pb" file and
    the "variables" directory.

    Args:
        :model_path: path to the trained model (HDFS or local)
        :model_name: name of the model/serving
        :model_version: version of the model/serving
        :overwrite: boolean flag whether to overwrite in case a serving already exists in the exported directory

    Returns:
        The path to where the model was exported

    Raises:
        :ValueError: if there was an error with the exportation of the model due to invalid user input
    """

    if not hdfs.exists(model_path) and not os.path.exists(model_path):
        raise ValueError("the provided model_path: {} , does not exist in HDFS or on the local filesystem".format(
            model_path))

    # Create directory in HDFS to put the model files
    project_path = hdfs.project_path()
    model_dir_hdfs = project_path + constants.MODEL_SERVING.MODELS_DATASET + \
                     constants.DELIMITERS.SLASH_DELIMITER + str(model_name) + \
                     constants.DELIMITERS.SLASH_DELIMITER + str(model_version) + \
                     constants.DELIMITERS.SLASH_DELIMITER
    if not hdfs.exists(model_dir_hdfs):
        hdfs.mkdir(model_dir_hdfs)

    if (not overwrite) and hdfs.exists(model_dir_hdfs) and hdfs.isfile(model_dir_hdfs):
        raise ValueError("Could not create model directory: {}, the path already exists and is a file, "
                         "set flag overwrite=True "
                         "to remove the file and create the correct directory structure".format(model_dir_hdfs))

    if overwrite and hdfs.exists(model_dir_hdfs) and hdfs.isfile(model_dir_hdfs):
        hdfs.delete(model_dir_hdfs)
        hdfs.mkdir(model_dir_hdfs)


    # Export the model files
    if os.path.exists(model_path):
        return _export_local_model(model_path, model_dir_hdfs, overwrite)
    else:
        return _export_hdfs_model(model_path, model_dir_hdfs, overwrite)
예제 #15
0
def get_logdir(app_id):
    global run_id
    return hopshdfs.project_path(
    ) + '/Logs/TensorFlow/' + app_id + '/horovod/run.' + str(run_id)
예제 #16
0
def export(model_path, model_name, model_version=None, overwrite=False, metrics=None, description=None, synchronous=True, synchronous_timeout=120):
    """
    Copies a trained model to the Models directory in the project and creates the directory structure of:

    >>> Models
    >>>      |
    >>>      - model_name
    >>>                 |
    >>>                 - version_x
    >>>                 |
    >>>                 - version_y

    For example if you run this:

    >>> from hops import model
    >>> model.export("iris_knn.pkl", "irisFlowerClassifier", metrics={'accuracy': accuracy})

    It will copy the local model file "iris_knn.pkl" to /Projects/projectname/Models/irisFlowerClassifier/1/iris.knn.pkl
    on HDFS, and overwrite in case there already exists a file with the same name in the directory.

    If "model" is a directory on the local path exported by TensorFlow, and you run:

    >>> model.export("/model", "mnist", metrics={'accuracy': accuracy, 'loss': loss})

    It will copy the model directory contents to /Projects/projectname/Models/mnist/1/ , e.g the "model.pb" file and
    the "variables" directory.

    Args:
        :model_path: path to the trained model (HDFS or local)
        :model_name: name of the model
        :model_version: version of the model
        :overwrite: boolean flag whether to overwrite in case a model already exists in the exported directory
        :metrics: dict of evaluation metrics to attach to model
        :description: description about the model
        :synchronous: whether to synchronously wait for the model to be indexed in the models rest endpoint
        :synchronous_timeout: max timeout in seconds for waiting for the model to be indexed

    Returns:
        The path to where the model was exported

    Raises:
        :ValueError: if there was an error with th of the model due to invalid user input
        :ModelNotFound: if the model was not found
    """

    # Make sure model name is a string, users could supply numbers
    model_name = str(model_name)

    if not isinstance(model_path, string_types):
        model_path = model_path.decode()

    if not description:
        description = 'A collection of models for ' + model_name

    project_path = hdfs.project_path()

    assert hdfs.exists(project_path + "Models"), "Your project is missing a dataset named Models, please create it."

    if not hdfs.exists(model_path) and not os.path.exists(model_path):
        raise ValueError("the provided model_path: {} , does not exist in HDFS or on the local filesystem".format(
            model_path))

    # make sure metrics are numbers
    if metrics:
        _validate_metadata(metrics)

    model_dir_hdfs = project_path + constants.MODEL_SERVING.MODELS_DATASET + \
                     constants.DELIMITERS.SLASH_DELIMITER + model_name + constants.DELIMITERS.SLASH_DELIMITER

    if not hdfs.exists(model_dir_hdfs):
        hdfs.mkdir(model_dir_hdfs)
        hdfs.chmod(model_dir_hdfs, "ug+rwx")

    # User did not specify model_version, pick the current highest version + 1, set to 1 if no model exists
    version_list = []
    if not model_version and hdfs.exists(model_dir_hdfs):
        model_version_directories = hdfs.ls(model_dir_hdfs)
        for version_dir in model_version_directories:
            try:
                if hdfs.isdir(version_dir):
                    version_list.append(int(version_dir[len(model_dir_hdfs):]))
            except:
                pass
        if len(version_list) > 0:
            model_version = max(version_list) + 1

    if not model_version:
        model_version = 1

    # Path to directory in HDFS to put the model files
    model_version_dir_hdfs = model_dir_hdfs + str(model_version)

    # If version directory already exists and we are not overwriting it then fail
    if not overwrite and hdfs.exists(model_version_dir_hdfs):
        raise ValueError("Could not create model directory: {}, the path already exists, "
                         "set flag overwrite=True "
                         "to remove the version directory and create the correct directory structure".format(model_version_dir_hdfs))

    # Overwrite version directory by deleting all content (this is needed for Provenance to register Model as deleted)
    if overwrite and hdfs.exists(model_version_dir_hdfs):
       hdfs.delete(model_version_dir_hdfs, recursive=True)
       hdfs.mkdir(model_version_dir_hdfs)

    # At this point we can create the version directory if it does not exists
    if not hdfs.exists(model_version_dir_hdfs):
       hdfs.mkdir(model_version_dir_hdfs)

    # Export the model files
    if os.path.exists(model_path):
        export_dir=_export_local_model(model_path, model_version_dir_hdfs, overwrite)
    else:
        export_dir=_export_hdfs_model(model_path, model_version_dir_hdfs, overwrite)

    print("Exported model " + model_name + " as version " + str(model_version) + " successfully.")

    jobName=None
    if constants.ENV_VARIABLES.JOB_NAME_ENV_VAR in os.environ:
        jobName = os.environ[constants.ENV_VARIABLES.JOB_NAME_ENV_VAR]

    kernelId=None
    if constants.ENV_VARIABLES.KERNEL_ID_ENV_VAR in os.environ:
        kernelId = os.environ[constants.ENV_VARIABLES.KERNEL_ID_ENV_VAR]

    # Attach modelName_modelVersion to experiment directory
    model_summary = {'name': model_name, 'version': model_version, 'metrics': metrics,
    'experimentId': None, 'description': description, 'jobName': jobName, 'kernelId': kernelId}
    if 'ML_ID' in os.environ:
        # Attach link from experiment to model
        experiment_utils._attach_model_link_xattr(os.environ['ML_ID'], model_name + '_' + str(model_version))
        # Attach model metadata to models version folder
        model_summary['experimentId'] = os.environ['ML_ID']
        experiment_utils._attach_model_xattr(model_name + "_" + str(model_version), experiment_utils.dumps(model_summary))
    else:
        experiment_utils._attach_model_xattr(model_name + "_" + str(model_version), experiment_utils.dumps(model_summary))

    # Model metadata is attached asynchronously by Epipe, therefore this necessary to ensure following steps in a pipeline will not fail
    if synchronous:
        start_time = time.time()
        sleep_seconds = 5
        for i in range(int(synchronous_timeout/sleep_seconds)):
            try:
                time.sleep(sleep_seconds)
                print("Polling " + model_name + " version " + str(model_version) + " for model availability.")
                resp = get_model(model_name, model_version)
                if resp.ok:
                    print("Model now available.")
                    return
                print(model_name + " not ready yet, retrying in " + str(sleep_seconds) + " seconds.")
            except ModelNotFound:
                pass
        print("Model not available during polling, set a higher value for synchronous_timeout to wait longer.")

    return export_dir
예제 #17
0
def start_beam_jobserver(flink_session_name,
                         artifacts_dir="Resources",
                         jobserver_jar=None,
                         sdk_worker_parallelism=1):
    """
    Start the Java Beam job server that connects to the flink session cluster. User needs to provide the
    job name that started the Flink session and optionally the worker parallelism.

    Args:
      :flink_session_name: Job name that runs the Flink session.
      :sdk_worker_parallelism: Default parallelism for SDK worker processes. This option is only applied when the
      pipeline option sdkWorkerParallelism is set to 0.Default is 1, If 0, worker parallelism will be dynamically
      decided by runner.See also: sdkWorkerParallelism Pipeline Option (default: 1). For further documentation,
      please refer to Apache Beam docs.
    Returns:
        artifact_port, expansion_port, job_host, job_port, jobserver.pid
    """
    if jobserver_jar is None:
        jobserver_jar = os.path.join(
            util.get_flink_conf_dir(),
            "beam-runners-flink-1.8-job-server-2.15.0.jar")
    # Get Flink master URL (flink session cluster) from an ExecutionDTO
    method = constants.HTTP_CONFIG.HTTP_GET
    resource_url = constants.DELIMITERS.SLASH_DELIMITER + \
                   constants.REST_CONFIG.HOPSWORKS_REST_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + \
                   constants.REST_CONFIG.HOPSWORKS_PROJECT_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + \
                   hopsfs.project_id() + constants.DELIMITERS.SLASH_DELIMITER + \
                   "jobs" + constants.DELIMITERS.SLASH_DELIMITER + \
                   flink_session_name + constants.DELIMITERS.SLASH_DELIMITER + \
                   "executions" + \
                   "?limit=1&offset=0&sort_by=submissionTime:desc"
    response = util.send_request(method, resource_url)
    response_object = response.json()
    flink_master_url = response_object['items'][0]['flinkMasterURL']
    artifact_port = randint(10000, 65000)
    expansion_port = randint(10000, 65000)
    job_port = randint(10000, 65000)
    job_host = socket.getfqdn()
    log_base_path = ""
    if 'LOG_DIRS' in os.environ:
        log_base_path += os.environ['LOG_DIRS'] + "/"

    beam_jobserver_log = log_base_path + "beamjobserver-" + hopsfs.project_name().lower() + "-" + flink_session_name + \
                          "-" + str(job_port) + ".log"
    # copy jar to local
    with open(beam_jobserver_log, "wb") as out, open(beam_jobserver_log,
                                                     "wb") as err:
        jobserver = subprocess.Popen(
            [
                "java", "-jar", jobserver_jar,
                "--artifacts-dir=%s" % hopsfs.project_path() + artifacts_dir,
                "--flink-master-url=%s" % flink_master_url,
                "--artifact-port=%d" % artifact_port,
                "--expansion-port=%d" % expansion_port,
                "--job-host=%s" % job_host,
                "--job-port=%d" % job_port,
                "--sdk-worker-parallelism=%d" % sdk_worker_parallelism
            ],
            stdout=out,
            stderr=err,
            preexec_fn=util._on_executor_exit('SIGTERM'))
    global clusters
    clusters.append(flink_session_name)
    global jobserver_host
    jobserver_host = job_host
    global jobserver_port
    jobserver_port = job_port
    return {
        "jobserver_log": beam_jobserver_log,
        "artifact_port": artifact_port,
        "expansion_port": expansion_port,
        "job_host": job_host,
        "job_port": job_port,
        "jobserver.pid": jobserver.pid
    }
예제 #18
0
def start_beam_jobserver(
    flink_session_name,
    artifacts_dir="Resources",
    jobserver_jar=os.path.join(util.get_flink_lib_dir(),
                               "beam-runners-flink-1.9-job-server-2.24.0.jar"),
    jobserver_main_class="org.apache.beam.runners.flink.FlinkJobServerDriver",
    service_discover_jar=os.path.join(
        util.get_flink_lib_dir(),
        "service-discovery-client-0.5-SNAPSHOT.jar")):
    """
    Start the Java Beam job server that connects to the flink session cluster. User needs to provide the
    job name that started the Flink session and optionally the worker parallelism.

    Args:
      :flink_session_name: Job name that runs the Flink session.
      :artifacts_dir: Default dataset to store artifacts.
      :jobserver_jar: Portability framework jar filename.
    Returns:
        artifact_port, expansion_port, job_host, job_port, jobserver.pid
    """
    # Get Flink master URL (flink session cluster) from an ExecutionDTO
    method = constants.HTTP_CONFIG.HTTP_GET
    resource_url = constants.DELIMITERS.SLASH_DELIMITER + \
                   constants.REST_CONFIG.HOPSWORKS_REST_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + \
                   constants.REST_CONFIG.HOPSWORKS_PROJECT_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + \
                   hopsfs.project_id() + constants.DELIMITERS.SLASH_DELIMITER + \
                   "jobs" + constants.DELIMITERS.SLASH_DELIMITER + \
                   flink_session_name + constants.DELIMITERS.SLASH_DELIMITER + \
                   "executions" + \
                   "?limit=1&offset=0&sort_by=submissionTime:desc"
    response = util.send_request(method, resource_url)
    response_object = response.json()
    flink_master_url = response_object['items'][0]['flinkMasterURL']
    artifact_port = randint(10000, 65000)
    expansion_port = randint(10000, 65000)
    job_port = randint(10000, 65000)
    job_host = socket.getfqdn()
    log_base_path = ""
    if 'LOG_DIRS' in os.environ:
        log_base_path += os.environ['LOG_DIRS'] + "/"

    beam_jobserver_log = log_base_path + "beamjobserver-" + hopsfs.project_name().lower() + "-" + flink_session_name + \
                          "-" + str(job_port) + ".log"
    # copy jar to local
    with open(beam_jobserver_log, "wb") as out, open(beam_jobserver_log,
                                                     "wb") as err:
        # Get the hadoop glob classpath and filter out service-discover-client as there is a shading issue with
        # jackson dependency
        jobserver_cp_list = list(
            filter(
                lambda x: "service-discovery" not in x and x.endswith(".jar"),
                util.get_hadoop_classpath_glob().split(":")))
        jobserver_cp_list.extend((service_discover_jar, jobserver_jar))
        jobserver_cp_path = ":".join(jobserver_cp_list).replace("\n", "")

        jobserver = subprocess.Popen(
            [
                "java", "-cp",
                "%s" % jobserver_cp_path, jobserver_main_class,
                "--artifacts-dir=%s" % hopsfs.project_path() + artifacts_dir,
                "--flink-master-url=%s" % flink_master_url,
                "--artifact-port=%d" % artifact_port,
                "--expansion-port=%d" % expansion_port,
                "--job-host=%s" % job_host,
                "--job-port=%d" % job_port
            ],
            stdout=out,
            stderr=err,
            preexec_fn=util._on_executor_exit('SIGTERM'))
    global clusters
    clusters.append(flink_session_name)
    global jobserver_host
    jobserver_host = job_host
    global jobserver_port
    jobserver_port = job_port
    return {
        "jobserver_log": beam_jobserver_log,
        "artifact_port": artifact_port,
        "expansion_port": expansion_port,
        "job_host": job_host,
        "job_port": job_port,
        "jobserver.pid": jobserver.pid
    }
예제 #19
0
 def project_path(self, project=None, exclude_nn_addr=False):
     return hopshdfs.project_path(project=project, exclude_nn_addr=exclude_nn_addr)
예제 #20
0
def start_beam_jobserver(flink_session_name,
                         artifacts_dir="Resources",
                         jobserver_jar=None):
    """
    Start the Java Beam job server that connects to the flink session cluster. User needs to provide the
    job name that started the Flink session and optionally the worker parallelism.

    Args:
      :flink_session_name: Job name that runs the Flink session.
      :artifacts_dir: Default dataset to store artifacts.
      :jobserver_jar: Portability framework jar filename.
    Returns:
        artifact_port, expansion_port, job_host, job_port, jobserver.pid
    """
    if jobserver_jar is None:
        jobserver_jar = os.path.join(util.get_flink_conf_dir(), "beam-runners-flink-1.9-job-server-2.19.0.jar")
    # Get Flink master URL (flink session cluster) from an ExecutionDTO
    method = constants.HTTP_CONFIG.HTTP_GET
    resource_url = constants.DELIMITERS.SLASH_DELIMITER + \
                   constants.REST_CONFIG.HOPSWORKS_REST_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + \
                   constants.REST_CONFIG.HOPSWORKS_PROJECT_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + \
                   hopsfs.project_id() + constants.DELIMITERS.SLASH_DELIMITER + \
                   "jobs" + constants.DELIMITERS.SLASH_DELIMITER + \
                   flink_session_name + constants.DELIMITERS.SLASH_DELIMITER + \
                   "executions" + \
                   "?limit=1&offset=0&sort_by=submissionTime:desc"
    response = util.send_request(method, resource_url)
    response_object = response.json()
    flink_master_url = response_object['items'][0]['flinkMasterURL']
    artifact_port = randint(10000, 65000)
    expansion_port = randint(10000, 65000)
    job_port = randint(10000, 65000)
    job_host = socket.getfqdn()
    log_base_path = ""
    if 'LOG_DIRS' in os.environ:
        log_base_path += os.environ['LOG_DIRS'] + "/"

    beam_jobserver_log = log_base_path + "beamjobserver-" + hopsfs.project_name().lower() + "-" + flink_session_name + \
                          "-" + str(job_port) + ".log"
    # copy jar to local
    with open(beam_jobserver_log, "wb") as out, open(beam_jobserver_log, "wb") as err:
        jobserver = subprocess.Popen(["java",
                                       "-jar", jobserver_jar,
                                       "--artifacts-dir=%s" % hopsfs.project_path() + artifacts_dir,
                                       "--flink-master-url=%s" % flink_master_url,
                                       "--artifact-port=%d" % artifact_port,
                                       "--expansion-port=%d" % expansion_port,
                                       "--job-host=%s" % job_host,
                                       "--job-port=%d" % job_port],
                                      stdout=out,
                                      stderr=err,
                                      preexec_fn=util._on_executor_exit('SIGTERM'))
    global clusters
    clusters.append(flink_session_name)
    global jobserver_host
    jobserver_host = job_host
    global jobserver_port
    jobserver_port = job_port
    return {"jobserver_log": beam_jobserver_log,
            "artifact_port": artifact_port,
            "expansion_port": expansion_port,
            "job_host": job_host,
            "job_port": job_port,
            "jobserver.pid": jobserver.pid}
예제 #21
0
def keras_mnist():
    import os
    import uuid

    import tensorflow as tf

    from hops import tensorboard

    from hops import model as hops_model
    from hops import hdfs

    batch_size = 32
    num_classes = 10

    # Provide path to train and validation datasets
    train_filenames = tf.io.gfile.glob(
        hdfs.project_path(td_proj_name) + '/' + td_ds + '/' + td +
        '/train/part-r-*')
    validation_filenames = tf.io.gfile.glob(
        hdfs.project_path(td_proj_name) + '/' + td_ds + '/' + td +
        '/validate/part-r-*')

    # Define input function
    def data_input(filenames,
                   batch_size=128,
                   num_classes=10,
                   shuffle=False,
                   repeat=None):
        def parser(serialized_example):
            """Parses a single tf.Example into image and label tensors."""
            features = tf.io.parse_single_example(
                serialized_example,
                features={
                    'image': tf.io.FixedLenFeature([28 * 28], tf.float32),
                    'label': tf.io.FixedLenFeature([], tf.int64),
                })

            image = tf.cast(features['image'], tf.float32)
            label = tf.cast(features['label'], tf.int32)

            # Create a one hot array for your labels
            label = tf.one_hot(label, num_classes)

            return image, label

        # Import MNIST data
        dataset = tf.data.TFRecordDataset(filenames)

        # Map the parser over dataset, and batch results by up to batch_size
        dataset = dataset.map(parser)
        if shuffle:
            dataset = dataset.shuffle(buffer_size=128)
        dataset = dataset.batch(batch_size, drop_remainder=True)
        dataset = dataset.repeat(repeat)
        return dataset

    # Define a Keras Model.
    model = tf.keras.Sequential()
    model.add(
        tf.keras.layers.Dense(128, activation='relu', input_shape=(784, )))
    model.add(tf.keras.layers.Dense(num_classes, activation='softmax'))

    # Compile the model.
    model.compile(loss=tf.keras.losses.categorical_crossentropy,
                  optimizer=tf.keras.optimizers.Adam(0.001),
                  metrics=['accuracy'])

    callbacks = [
        tf.keras.callbacks.TensorBoard(log_dir=tensorboard.logdir()),
        tf.keras.callbacks.ModelCheckpoint(filepath=tensorboard.logdir()),
    ]
    model.fit(data_input(train_filenames, batch_size),
              verbose=0,
              epochs=3,
              steps_per_epoch=5,
              validation_data=data_input(validation_filenames, batch_size),
              validation_steps=1,
              callbacks=callbacks)

    score = model.evaluate(data_input(validation_filenames, batch_size),
                           steps=1)

    # Export model
    # WARNING(break-tutorial-inline-code): The following code snippet is
    # in-lined in tutorials, please update tutorial documents accordingly
    # whenever code changes.

    export_path = os.getcwd() + '/model-' + str(uuid.uuid4())
    print('Exporting trained model to: {}'.format(export_path))

    tf.saved_model.save(model, export_path)

    print('Done exporting!')

    metrics = {'accuracy': score[1]}

    hops_model.export(export_path,
                      model_name,
                      metrics=metrics,
                      project=model_proj_name)

    return metrics
예제 #22
0
# Copyright (C) 2020, Logical Clocks AB. All rights reserved
# !/usr/bin/env python
# -*- coding: utf-8 -*-

from pyspark.sql import SparkSession
from hops import hdfs

spark = SparkSession.builder.appName("hello_world_app").getOrCreate()
print("hello world")
project_path = hdfs.project_path()
mydf = spark.createDataFrame(["10", "11", "13"], "string").toDF("age")
mydf.write.csv(project_path + '/Resources/mycsv2.csv')