コード例 #1
0
    def read_desc_file(self, comp_path):
        self._logger.debug("Reading component's metadata: {}".format(comp_path))
        comp_ref_json = os.path.join(comp_path, ComponentsDesc.COMPONENT_METADATA_REF_FILE)
        if os.path.isfile(comp_ref_json):
            with open(comp_ref_json, "r") as f:
                try:
                    comp_ref = json.load(f)
                except ValueError as e:
                    msg = "Failed to load (parse) component metadata's reference file! ref-file: {}".format(comp_ref_json)
                    self._logger.error(msg)
                    raise MLCompException(msg)

            metadata_filename = comp_ref[json_fields.COMPONENT_METADATA_REF_FILE_NAME_FIELD]
            comp_desc = ComponentsDesc._load_comp_desc(comp_path, metadata_filename)
        else:
            # Try to find any valid component's description file
            comp_desc_gen = ComponentsDesc.next_comp_desc(comp_path)
            try:
                # next() is called only once, because only one component JSON file is expected.
                _, comp_desc, _ = next(comp_desc_gen)
            except StopIteration:
                comp_desc = None

        if not comp_desc:
            msg = "Failed to find any valid component's json desc! comp_path: {}".format(comp_path)
            self._logger.error(msg)
            raise MLCompException(msg)

        return comp_desc
コード例 #2
0
ファイル: py_spark_engine.py プロジェクト: pavantyagi/mlpiper
 def set_dataframe(self, dataframe):
     if self._dataframe:
         if type(self._dataframe) is list:
             raise MLCompException("DataFrame list was already set for the given pipeline! pipeline: {}, "
                                   .format(self.name()))
         else:
             raise MLCompException("DataFrame was already set for the given pipeline! pipeline: {}, "
                                   "existing-columns: {}, new-columns: {}"
                                   .format(self.name(), self._dataframe.columns, dataframe.columns))
     self._dataframe = dataframe
コード例 #3
0
    def _validate_output(self, rdds):
        if rdds:
            if type(rdds) is not list:
                raise MLCompException(
                    "Invalid non-list output! Expecting for a list of RDDs!")

            for rdd in rdds:
                if not issubclass(rdd.__class__, RDD):
                    raise MLCompException(
                        "Invalid returned list of rdd types! Expecting for 'pyspark.rdd.RDD'! "
                        "name: {}, type: {}".format(self.name(), type(df)))
コード例 #4
0
ファイル: dag.py プロジェクト: mlpiper/mlpiper
    def _find_comp_desc(self, pipe_comp):
        comp_type = pipe_comp[json_fields.PIPELINE_COMP_TYPE_FIELD]
        self._logger.debug("Component found in pipeline, id={}, type={}".format(
            pipe_comp[json_fields.PIPELINE_COMP_ID_FIELD], comp_type))
        match_comp = [comp for comp in self._comp_desc_list
                      if comp[json_fields.COMPONENT_DESC_NAME_FIELD] == comp_type]
        if not match_comp:
            raise MLCompException(
                "Could not find a component read from the pipeline! type=[{}] desc=[{}] pipe_comp=[{}]".format(
                    comp_type, self._comp_desc_list, pipe_comp[json_fields.PIPELINE_COMP_ID_FIELD]))
        elif len(match_comp) > 1:
            raise MLCompException("Found more then one component! type=" + comp_type)

        return match_comp[0]
コード例 #5
0
    def _print_acc_messages(self):
        if not self.__logger:
            raise MLCompException("None logger! Invalid internal sequence!")

        if self._msg_container:
            for m in self._msg_container:
                self.__logger.info(m)
コード例 #6
0
ファイル: py_spark_engine.py プロジェクト: vakjha1/mlpiper
 def set_output_model_path(self, path):
     if self._output_model_path:
         raise MLCompException(
             "Output model path was already set for the given pipeline! pipeline: {}, "
             "existing-path: {}, new-path: {}".format(
                 self.name(), self._output_model_path, path))
     self._output_model_path = path
コード例 #7
0
    def monitor(self):
        self._logger.info("Monitoring job ... {}".format(self._job_name))
        while True:
            response = self._describe_job()
            if self._logger.isEnabledFor(logging.DEBUG):
                self._logger.debug(pprint.pformat(response, indent=4))

            status = self._job_status(response)
            running_time_sec = self._total_running_time_sec(response)
            billing_time_sec = self._billing_time_sec(response)
            Report.job_status(self._job_name, running_time_sec, billing_time_sec, status)

            self._report_online_metrics(response)

            if status == SMApiConstants.JOB_COMPLETED:
                self._report_final_metrics(response)
                self._logger.info("Job '{}' completed!".format(self._job_name))
                if self._on_complete_callback:
                    self._on_complete_callback(response)
                break
            elif status == SMApiConstants.JOB_FAILED:
                msg = "Job '{}' failed! message: {}" \
                    .format(self._job_name, response[SMApiConstants.FAILURE_REASON])
                self._logger.error(msg)
                raise MLCompException(msg)
            elif status != SMApiConstants.JOB_IN_PROGRESS:
                self._logger.warning("Unexpected job status! job-name: {}, status: {}"
                                     .format(self._job_name, status))

            self._logger.info("Job '{}' is still running ... {} sec"
                              .format(self._job_name, running_time_sec))
            time.sleep(JobMonitorBase.MONITOR_INTERVAL_SEC)
コード例 #8
0
    def load(self, extended=True):
        components_desc = []

        if not self._comp_root_path:
            try:
                # The following call to 'pkg_resources.resource_filename' actually extract all the files
                # from the component's egg file from 'parallelm.code_components' folder
                self._comp_root_path = pkg_resources.resource_filename(ComponentsDesc.CODE_COMPONETS_MODULE_NAME, '')
                self._logger.info("Cached components are at: {}".format(self._comp_root_path))
            except ModuleNotFoundError:
                msg = "Either component's root path or component's egg file are missing!"
                self._logger.error(msg)
                raise MLCompException(msg)

        for comp_type in self._get_next_comp_type_in_pipeline():
            self._logger.info("Handling {}".format(comp_type))
            comp_path = os.path.join(self._comp_root_path, comp_type)

            if comp_path not in sys.path:
                sys.path.insert(0, comp_path)

            comp_desc = self.read_desc_file(comp_path)
            if extended:
                comp_desc[json_fields.COMPONENT_DESC_ROOT_PATH_FIELD] = comp_path

            self._add_default_values(comp_desc)
            self._logger.debug("Component loaded: " + str(comp_desc))
            components_desc.append(comp_desc)

        return components_desc
コード例 #9
0
ファイル: dag.py プロジェクト: mlpiper/mlpiper
    def run_connected_pipeline(self, system_conf, ee_conf, engine_info):
        # Components configuration phase
        print("Running pipeline")
        self._logger.debug("Running connected pipeline")
        for dag_node in self._sorted_execution_graph_list:
            input_args = dag_node.input_arguments(system_conf, ee_conf)
            dag_node.component_runner.configure(input_args)

        # Components materialize phase
        for dag_node in self._sorted_execution_graph_list:
            parent_data_objs = self.parent_data_objs(dag_node)

            self._logger.debug("Calling dag node '{}', with args: {}".format(dag_node.comp_name(), parent_data_objs))

            self._component_run_header(dag_node)
            start = time.time()

            sys.stderr.flush()
            sys.stdout.flush()
            data_objs = dag_node.component_runner.run(parent_data_objs)
            sys.stderr.flush()
            sys.stdout.flush()

            runtime_in_sec = time.time() - start
            if data_objs and type(data_objs) is not list:
                raise MLCompException("Invalid returned data type from component! It should be a list! "
                                      "name: " + dag_node.comp_name())

            self._component_run_footer(dag_node, data_objs, runtime_in_sec)

            self._logger.debug("Output of dag node '{}' is: {}".format(dag_node.comp_name(), data_objs))
            self.update_parent_data_objs(dag_node, data_objs)

        self._ml_engine.finalize()
コード例 #10
0
    def _setup_py4j_client_connection(self):
        gateway_params = GatewayParameters(port=self._java_port,
                                           auto_field=True,
                                           auto_close=True,
                                           eager_load=True)
        callback_server_params = CallbackServerParameters(
            port=0,
            daemonize=True,
            daemonize_connections=True,
            eager_load=True)
        self._gateway = JavaGateway(
            gateway_parameters=gateway_params,
            callback_server_parameters=callback_server_params,
            python_server_entry_point=self)
        self._component_via_py4j = self._gateway.entry_point.getComponent()
        if not self._component_via_py4j:
            raise MLCompException("None reference of py4j java object!")

        if self._verbose:
            self._logger.debug(
                self._prefix_msg +
                "Py4J component referenced successfully! comp_via_py4j: {}".
                format(self._component_via_py4j))

        self._component_via_py4j.setEnvAttributes(self.get_wid(),
                                                  self._verbose)
コード例 #11
0
    def _monitor_uwsgi_proc(self, stop_msg=None):
        try:
            monitor_stats = not stop_msg
            block_size = 2048

            stderr_fd = 2

            stdout_buff2lines = BufferToLines()
            stderr_buff2lines = BufferToLines()

            keep_reading = True
            last_stats_read = time.time()
            while keep_reading:

                read_fs = [self._stdout_pipe_r, self._stderr_pipe_r]

                # Sleep the exact time left within a 1 sec interval
                if monitor_stats:
                    sleep_time = self._stats_reporting_interval_sec - (time.time() - last_stats_read)
                    if sleep_time < 0:
                        sleep_time = 0
                else:
                    sleep_time = self._stats_reporting_interval_sec

                readable_fd = select.select(read_fs, [], [], sleep_time)[0]

                if monitor_stats:
                    wakeup_time = time.time()
                    if wakeup_time - last_stats_read > self._stats_reporting_interval_sec:
                        last_stats_read = wakeup_time
                        if self._stats:
                            self._stats.report()

                if readable_fd:
                    for pipe in readable_fd:
                        if pipe is self._stdout_pipe_r:
                            buff = os.read(pipe, block_size)
                            stdout_buff2lines.add(buff)
                            for line in stdout_buff2lines.lines():
                                print(line)

                            if stop_msg and stop_msg.encode() in buff:
                                keep_reading = False

                        if pipe is self._stderr_pipe_r:
                            buff = os.read(pipe, block_size)
                            stderr_buff2lines.add(buff)
                            for line in stderr_buff2lines.lines():
                                os.write(stderr_fd, (line + '\n').encode())
                else:
                    rc = self._proc.poll()
                    if rc is not None:
                        if rc != 0:
                            raise MLCompException("Error in 'uwsgi' server! rc: {}".format(rc))
                        break

        except MLCompException as e:
            self._cleanup()
            raise e
コード例 #12
0
ファイル: metric.py プロジェクト: vakjha1/mlpiper
    def add_related_metric(self, bar_graph_metric):
        if self.metric_relation != MetricRelation.BAR_GRAPH:
            raise MLCompException("Related metric can be added only to bar graph!")

        if not isinstance(bar_graph_metric, tuple) or len(bar_graph_metric) != 2:
            raise MLCompException("Related metric information should be a tuple of the metric itself and a"
                                  "bar column label! related_metric: {}".format(bar_graph_metric))

        if not isinstance(bar_graph_metric[0], Metric):
            raise MLCompException("First element in related bar graph metric should be a Metric! "
                                  "provided: {}".format(bar_graph_metric[0]))

        if not isinstance(bar_graph_metric[1], six.string_types):
            raise MLCompException("Second element in related bar graph metric should be a string"
                                  "provided: {}".format(bar_graph_metric[1]))

        self._related_metric.append(bar_graph_metric)
コード例 #13
0
ファイル: topological_sort.py プロジェクト: vakjha1/mlpiper
 def _call_class_attr(cls, attr_name):
     attr = getattr(cls, attr_name, None)
     if not attr:
         raise MLCompException(
             "The given class does not include the given attribute name! " +
             "class: {}, attr_name: {}".format(cls, attr_name))
     attr_value = attr() if callable(attr) else attr
     return attr_value
コード例 #14
0
ファイル: uwsgi_monitor.py プロジェクト: theromis/mlpiper
    def start(self):
        if not self._proc:
            raise MLCompException("uWSGI process was not setup for monitoring!")

        th = threading.Thread(target=self._run)
        self._monitor_info[UwsgiConstants.MONITOR_THREAD_KEY] = th

        th.start()
コード例 #15
0
ファイル: flask_app_wrapper.py プロジェクト: vakjha1/mlpiper
    def __init__(self, handler, raw):
        if not inspect.ismethod(handler):
            raise MLCompException(
                "Invalid REST endpoint handler! Should be a component's method with the "
                "following prototype: <handler>(self, url_params, form_params), given: {}"
                .format(handler))

        self._handler = handler
        self._raw = raw
コード例 #16
0
    def _materialize(self, parent_data_objs, user_data):

        self._init_params()

        tmp_dataset_filepath = os_util.tmp_filepath()
        self._logger.info(
            "Temporary dataset file path: {}".format(tmp_dataset_filepath))

        try:
            urllib.request.urlretrieve(
                self._dataset_url,
                tmp_dataset_filepath,
                reporthook=DatasetDownloader._download_report_hook)
            self._logger.info("Dataset download completed ... 100%")

            train_set, valid_set, test_set = (None, None, None)
            with gzip.open(tmp_dataset_filepath, 'rb') as f:
                loaded_artifacts = pickle.load(f, encoding='latin1')
                try:
                    train_set, valid_set, test_set = loaded_artifacts
                except ValueError:
                    try:
                        train_set, valid_set = loaded_artifacts
                    except ValueError:
                        train_set = loaded_artifacts

            self._logger.info("Dataset downloaded and loaded! " +
                              "#samples in train set: {}, ".format(
                                  len(train_set[0]) if train_set else None) +
                              "#samples in valid set: {}, ".format(
                                  len(valid_set[0]) if valid_set else None) +
                              "#samples in test set: {}".
                              format(len(test_set[0])) if test_set else None)

            if self._train_set_local_csv_filepath and train_set:
                self._save_to_csv(train_set[0],
                                  self._train_set_local_csv_filepath)

            if self._valid_set_local_csv_filepath and valid_set:
                self._save_to_csv(valid_set[0],
                                  self._valid_set_local_csv_filepath)

            if self._test_set_local_csv_filepath and test_set:
                self._save_to_csv(test_set[0],
                                  self._test_set_local_csv_filepath)

            return [train_set, valid_set, test_set]
        except Exception as e:
            msg = "Failed to download and read dataset!\n{}".format(e)
            self._logger.error(msg)
            raise MLCompException(msg)
        finally:
            self._logger.info(
                "Cleaning up temporary dataset file path: {}".format(
                    tmp_dataset_filepath))
            os_util.remove_file_safely(tmp_dataset_filepath)
コード例 #17
0
    def _read_execution_env_params(self):
        ee_config = self._pipeline.get('executionEnvironment',
                                       dict()).get('configs')
        if not ee_config:
            raise MLCompException(
                "Missing execution environment section in pipeline json!")

        eng_config = ee_config.get('engConfig')
        if not eng_config:
            raise MLCompException(
                "Missing execution environment engine section in pipeline json!"
            )

        if eng_config['type'] != SageMakerEngine.TYPE:
            raise MLCompException(
                "Unexpected engine type in execution environment! expected: '{}', got: {}"
                .format(SageMakerEngine.TYPE, eng_config['type']))

        return eng_config['arguments']
コード例 #18
0
ファイル: nginx_broker.py プロジェクト: theromis/mlpiper
    def _run(self, shared_conf):
        self._logger.info("Starting 'nginx' service ... cmd: '{}'".format(NginxConstants.START_CMD))
        if self._dry_run:
            return

        rc = subprocess.check_call(NginxConstants.START_CMD, shell=True)
        if rc != 0:
            raise MLCompException("nginx service failed to start! It is suspected as not being installed!")

        self._logger.info("'nginx' service started successfully!")
コード例 #19
0
ファイル: nginx_broker.py プロジェクト: pavantyagi/mlpiper
    def _server_conf_filepath(self, platform_name):
        if self._debian_platform(platform_name):
            d = NginxConstants.SERVER_CONF_DIR_DEBIAN
        elif self._redhat_platform(platform_name):
            d = NginxConstants.SERVER_CONF_DIR_REDHAT
        elif self._macos_platform(platform_name):
            if not os.path.isdir(NginxConstants.SERVER_CONF_DIR_MACOS):
                if not os.path.isdir(NginxConstants.NGINX_ROOT_MACOS):
                    raise MLCompException(
                        "'{}' does not exist or not a directory. Is nginx installed?"
                        .format(NginxConstants.NGINX_ROOT_MACOS))
                os.mkdir(NginxConstants.SERVER_CONF_DIR_MACOS)
            d = NginxConstants.SERVER_CONF_DIR_MACOS
        else:
            raise MLCompException(
                "Nginx cannot be configured! Platform is not supported: {}".
                format(platform_name))

        return os.path.join(d, NginxConstants.SERVER_CONF_FILENAME)
コード例 #20
0
ファイル: nginx_broker.py プロジェクト: theromis/mlpiper
    def _server_conf_filepath(self, platform_name):
        if self._debian_platform(platform_name):
            d = NginxConstants.SERVER_CONF_DIR_DEBIAN
        elif self._redhat_platform(platform_name):
            d = NginxConstants.SERVER_CONF_DIR_REDHAT
        elif self._macos_platform(platform_name):
            d = NginxConstants.SERVER_CONF_DIR_MACOS
        else:
            raise MLCompException("Nginx cannot be configured! Platform is not supported: {}".format(platform_name))

        return os.path.join(d, NginxConstants.SERVER_CONF_FILENAME)
コード例 #21
0
    def _materialize(self, parent_data_objs, user_data):

        if not parent_data_objs or len(parent_data_objs) != 3:
            raise MLCompException("Expecting 3 parent inputs! got: {}, parent_data: {}"
                                  .format(len(parent_data_objs), parent_data_objs))

        self._init_params(parent_data_objs)
        self._convert_and_upload()
        self._do_training()
        self._monitor_job()
        self._download_model()
コード例 #22
0
    def _materialize(self, parent_data_objs, user_data):

        if len(parent_data_objs) != 1:
            raise MLCompException("Missing a mandatory s3 url for a file as input!")

        s3_url = parent_data_objs[0]
        if s3_url:
            local_filepath = self._params['local_filepath']
            AwsHelper(self._logger).download_file(s3_url, local_filepath)
        else:
            self._logger.info("Nothing to download from AWS S3!")
コード例 #23
0
    def __init__(self, mlops, ml_engine, polling_interval_sec=10.0):
        super(BgActor, self).__init__()
        self.set_logger(ml_engine.get_engine_logger(self.logger_name()))

        if not mlops or not mlops.init_called:
            raise MLCompException("'mlops' was not setup properly!")

        self._mlops = mlops
        self._polling_interval_sec = polling_interval_sec

        self._condition = threading.Condition()
        self._stop_gracefully = False
コード例 #24
0
    def _load_comp_desc(root, filename):
        if filename.endswith(".json"):
            comp_json = os.path.join(root, filename)
            with open(comp_json) as f:
                try:
                    comp_desc = json.load(f)
                except ValueError as ex:
                    raise MLCompException("Found json file with invalid json format! filename: {}, exception: {}".format(comp_json, str(ex)))

            if ComponentsDesc.is_valid(comp_desc):
                return comp_desc
        return None
コード例 #25
0
    def _setup_env(self, eng_args_config):
        region = EeArg(eng_args_config.get('region')).value

        aws_access_key_id = EeArg(
            eng_args_config.get('aws_access_key_id')).value
        if not aws_access_key_id:
            raise MLCompException(
                "Empty 'aws_access_key_id' parameter in execution environment!"
            )

        aws_secret_access_key = EeArg(
            eng_args_config.get('aws_secret_access_key')).value
        if not aws_secret_access_key:
            raise MLCompException(
                "Missing 'aws_secret_access_key' parameter in execution environment!"
            )

        os.environ[SageMakerEngine.AWS_DEFAULT_REGION] = region
        os.environ[SageMakerEngine.AWS_ACCESS_KEY_ID] = aws_access_key_id
        os.environ[
            SageMakerEngine.AWS_SECRET_ACCESS_KEY] = aws_secret_access_key
コード例 #26
0
    def _init_ml_engine(self, pipeline):
        engine_type = pipeline[json_fields.PIPELINE_ENGINE_TYPE_FIELD]
        self._logger.info("Engine type: {}".format(engine_type))
        if engine_type == EngineType.PY_SPARK:
            from parallelm.ml_engine.py_spark_engine import PySparkEngine

            self._ml_engine = PySparkEngine(
                pipeline[json_fields.PIPELINE_NAME_FIELD], self._run_locally,
                self._spark_jars)
            self.set_logger(
                self._ml_engine.get_engine_logger(self.logger_name()))
            if mlops_loaded:
                mlops.init(self._ml_engine.context)

        elif engine_type == EngineType.GENERIC:
            from parallelm.ml_engine.python_engine import PythonEngine

            self._logger.info("Using python engine")
            self._ml_engine = PythonEngine(
                pipeline[json_fields.PIPELINE_NAME_FIELD], self._mlcomp_jar)
            self.set_logger(
                self._ml_engine.get_engine_logger(self.logger_name()))
            if mlops_loaded:
                # This initialization applies only to Python components and not to components
                # that are written in other languages (.e.g R). The reason for that is that
                # those components are executed within different process and thus need to
                # load and init the mlops library separately.
                mlops.init()

        elif engine_type == EngineType.REST_MODEL_SERVING:
            from parallelm.ml_engine.rest_model_serving_engine import RestModelServingEngine

            self._logger.info("Using REST Model Serving engine")
            self._ml_engine = RestModelServingEngine(
                pipeline[json_fields.PIPELINE_NAME_FIELD], self._mlcomp_jar,
                self._standalone)
            self.set_logger(
                self._ml_engine.get_engine_logger(self.logger_name()))
            if mlops_loaded:
                # This initialization applies only to Python components and not to components
                # that are written in other languages (.e.g R). The reason for that is that
                # those components are executed within different process and thus need to
                # load and init the mlops library separately.
                mlops.init()

        else:
            raise MLCompException(
                "Engine type is not supported by the Python execution engine! engineType: "
                + engine_type)

        if mlops_loaded:
            self._ml_engine.run(mlops, pipeline)
コード例 #27
0
ファイル: metric.py プロジェクト: vakjha1/mlpiper
    def __init__(self, name, title=None, hidden=False, metric_type=MetricType.COUNTER, value_type=int,
                 metric_relation=None, related_metric=None):
        super(Metric, self).__init__(logging.getLogger(self.logger_name()))

        self._metric_name = name + Metric.NAME_SUFFIX
        self._title = title
        self._hidden = hidden
        self._metric_type = metric_type
        self._value_type = value_type
        self._metric_relation = metric_relation
        self._metric_already_displayed = False

        if not self._hidden and not self._title:
            raise MLCompException("A metric can be seen in the UI only if 'title' is provided! name: {}"
                                  .format(name))

        if self.metric_relation == MetricRelation.BAR_GRAPH:
            if not isinstance(related_metric, list):
                raise MLCompException("Bar graph metric should be provided with a list of metrics tuples. "
                                      "Each tuple should contain the related metric and its bar name! "
                                      "name: {}, related_metrics: {}".format(self.name, self.related_metric))

            self._related_metric = []
            for m in related_metric:
                self.add_related_metric(m)
        else:
            self._related_metric = related_metric if isinstance(related_metric, list) else [related_metric]

            if self._related_metric[0] and self._related_metric[0].metric_type != metric_type:
                raise MLCompException("Error in metrics relation! Given metric cannot relate to other metric of "
                                      "different type!" + " mentric: {}, type: {}, related-metric: {}, type: {}"
                                      .format(name, metric_type, self._related_metric[0].metric_name,
                                              self._related_metric[0].metric_type))

        if name in Metric._metrics:
            raise MLCompException("Metric has already been defined! name: {}".name)

        self._logger.info("Add new uwsgi metric ... {}".format(self._metric_name))
        Metric._metrics[self._metric_name] = self
コード例 #28
0
    def _materialize(self, parent_data_objs, user_data):
        if not parent_data_objs:
            raise MLCompException(
                "Missing expected dataset S3 url from parent input!")

        if not self._init_params(parent_data_objs):
            return

        self._upload_model_to_s3()
        self._create_model()
        self._create_transformation_job()
        self._monitor_job()
        return [self._predictions_s3_url()]
コード例 #29
0
ファイル: topological_sort.py プロジェクト: vakjha1/mlpiper
    def _visit(self, t_node):
        self._logger.debug("Visiting node: {}".format(t_node.key))
        if t_node.perm_visit:
            return

        if t_node.temp_visit:
            raise MLCompException(
                "The pipeline has invalid cyclic loop (Not a DAG)! pipe-node-id: {}"
                .format(dag_node.pipe_id()))

        t_node.temp_visit = True
        for child_key in t_node.child_keys:
            if child_key not in self._graph_aux:
                raise MLCompException(
                    "Child id was not found in the graph! key: {}".format(
                        child_key))

            self._visit(self._graph_aux[child_key])

        t_node.temp_visit = False
        t_node.perm_visit = True
        self._sorted_graph.append(t_node.node)
コード例 #30
0
    def _load_pipeline(self):
        if self._pipeline:
            return self._pipeline

        if self._json_pipeline:
            self._pipeline = json.loads(self._json_pipeline)
        elif self._pipeline_file:
            self._pipeline = json.load(self._pipeline_file)
        else:
            raise MLCompException("Missing pipeline file!")

        # Validations
        if json_fields.PIPELINE_PIPE_FIELD not in self._pipeline:
            raise MLCompException("Pipeline does not contain any component! pipeline=" + str(self._pipeline))

        if mlops_loaded:
            pipeline_str = mask_passwords(str(self._pipeline))
        else:
            pipeline_str = str(self._pipeline)
        self._logger.debug("Pipeline: " + pipeline_str)

        return self._pipeline