Esempio n. 1
0
    def __init__(self,
                 name: Text,
                 schema: Dict = None,
                 _id: Text = None,
                 *args, **kwargs):
        """
        Construct the datasource

        Args:
            name (str): name of datasource
            schema (dict): schema of datasource
            _id: unique ID (for internal use)
        """
        if _id:
            # Its loaded from config
            self._id = _id
            logger.debug(f'Datasource {name} loaded.')
        else:
            # If none, then this is assumed to be 'new'. Check dupes.
            all_names = Repository.get_instance().get_datasource_names()
            if any(d == name for d in all_names):
                raise AlreadyExistsException(
                    name=name,
                    resource_type='datasource')
            self._id = str(uuid4())
            track(event=CREATE_DATASOURCE)
            logger.info(f'Datasource {name} created.')

        self.name = name
        self.schema = schema
        self._immutable = False
        self._source = source_utils.resolve_source_path(
            self.__class__.__module__ + '.' + self.__class__.__name__
        )
Esempio n. 2
0
    def __init__(self, **kwargs):
        if len(kwargs):
            self._kwargs = kwargs
        else:
            self._kwargs = {}

        self._source = source_utils.resolve_source_path(
            self.__class__.__module__ + '.' + self.__class__.__name__)
        self._immutable = False
Esempio n. 3
0
    def __init__(self, **kwargs):
        """
        Base data step constructor.

        Args:
            **kwargs: Keyword arguments used in the construction of a step
            from a configuration file.
        """

        self._kwargs = kwargs
        self._immutable = False
        self._source = resolve_source_path(self.__class__.__module__ + '.' +
                                           self.__class__.__name__)
Esempio n. 4
0
 def get_config(self):
     return {
         "cortex_serving_args": {
             "env":
             self.env,
             "api_spec":
             self.api_spec,
             "predictor_path":
             resolve_source_path(self.predictor.__module__ + '.' +
                                 self.predictor.__name__),
             "requirements":
             self.requirements,
             "conda_packages":
             self.conda_packages,
             "project_dir":
             self.project_dir,
             "force":
             self.force,
             "wait":
             self.wait,
         }
     }
Esempio n. 5
0
    def __init__(self,
                 name: Text = None,
                 enable_cache: Optional[bool] = True,
                 steps_dict: Dict[Text, BaseStep] = None,
                 backend: OrchestratorBaseBackend = None,
                 metadata_store: Optional[ZenMLMetadataStore] = None,
                 artifact_store: Optional[ArtifactStore] = None,
                 datasource: Optional[BaseDatasource] = None,
                 pipeline_name: Optional[Text] = None,
                 *args,
                 **kwargs):
        """
        Construct a base pipeline. This is a base interface that is meant
        to be overridden in multiple other pipeline use cases.

        Args:
            name: Outward-facing name of the pipeline.
            pipeline_name: A unique name that identifies the pipeline after
             it is run.
            enable_cache: Boolean, indicates whether or not caching
             should be used.
            steps_dict: Optional dict of steps.
            backend: Orchestrator backend.
            metadata_store: Configured metadata store. If None,
             the default metadata store is used.
            artifact_store: Configured artifact store. If None,
             the default artifact store is used.
        """
        # Generate a name if not given
        if name is None:
            name = str(round(time.time() * 1000))
        self.name = name
        self._immutable = False

        # Metadata store
        if metadata_store:
            self.metadata_store: ZenMLMetadataStore = metadata_store
        else:
            # use default
            self.metadata_store: ZenMLMetadataStore = \
                Repository.get_instance().get_default_metadata_store()

        if pipeline_name:
            # This means its been loaded in through YAML, try to get context
            self.pipeline_name = pipeline_name
            self.file_name = self.pipeline_name + '.yaml'
        else:
            # if pipeline_name is None then its a new pipeline
            self.pipeline_name = self.create_pipeline_name_from_name()
            self.file_name = self.pipeline_name + '.yaml'
            # check duplicates here as its a 'new' pipeline
            self._check_registered()
            track(event=CREATE_PIPELINE)
            logger.info(f'Pipeline {name} created.')

        self.enable_cache = enable_cache

        if steps_dict is None:
            self.steps_dict: Dict[Text, BaseStep] = {}
        else:
            self.steps_dict = steps_dict

        # Default to local
        if backend is None:
            self.backend = OrchestratorBaseBackend()
        else:
            self.backend = backend

        # Artifact store
        if artifact_store:
            self.artifact_store = artifact_store
        else:
            # use default
            self.artifact_store = \
                Repository.get_instance().get_default_artifact_store()

        # Datasource
        if datasource:
            self.datasource = datasource
        else:
            self.datasource = None

        self._source = source_utils.resolve_source_path(
            self.__class__.__module__ + '.' + self.__class__.__name__)
        self._kwargs = {
            keys.PipelineDetailKeys.NAME: self.pipeline_name,
            keys.PipelineDetailKeys.ENABLE_CACHE: self.enable_cache,
        }
        if kwargs:
            self._kwargs.update(kwargs)