def __init__(self, name: Text, schema: Dict = None, _id: Text = None, *args, **kwargs): """ Construct the datasource Args: name (str): name of datasource schema (dict): schema of datasource _id: unique ID (for internal use) """ if _id: # Its loaded from config self._id = _id logger.debug(f'Datasource {name} loaded.') else: # If none, then this is assumed to be 'new'. Check dupes. all_names = Repository.get_instance().get_datasource_names() if any(d == name for d in all_names): raise AlreadyExistsException( name=name, resource_type='datasource') self._id = str(uuid4()) track(event=CREATE_DATASOURCE) logger.info(f'Datasource {name} created.') self.name = name self.schema = schema self._immutable = False self._source = source_utils.resolve_source_path( self.__class__.__module__ + '.' + self.__class__.__name__ )
def __init__(self, name: Text, enable_cache: Optional[bool] = True, steps_dict: Dict[Text, BaseStep] = None, backends_dict: Dict[Text, BaseBackend] = None, metadata_store: Optional[ZenMLMetadataStore] = None, artifact_store: Optional[ArtifactStore] = None, datasource: Optional[BaseDatasource] = None, pipeline_name: Optional[Text] = None, *args, **kwargs): """ Construct a base pipeline. This is a base interface that is meant to be overridden in multiple other pipeline use cases. Args: name: Outward-facing name of the pipeline. pipeline_name: A unique name that identifies the pipeline after it is run. enable_cache: Boolean, indicates whether or not caching should be used. steps_dict: Optional dict of steps. backends_dict: Optional dict of backends metadata_store: Configured metadata store. If None, the default metadata store is used. artifact_store: Configured artifact store. If None, the default artifact store is used. """ self.name = name # Metadata store if metadata_store: self.metadata_store: ZenMLMetadataStore = metadata_store else: # use default self.metadata_store: ZenMLMetadataStore = \ Repository.get_instance().get_default_metadata_store() if pipeline_name: # This means its been loaded in through YAML, try to get context if self.is_executed_in_metadata_store: self._immutable = True logger.debug(f'Pipeline {name} loaded and and is immutable.') else: # if metadata store does not have the pipeline_name, then we # can safely execute this again. self._immutable = False logger.debug(f'Pipeline {name} loaded and can be run.') self.pipeline_name = pipeline_name self.file_name = self.pipeline_name + '.yaml' else: # if pipeline_name is None then its a new pipeline self._immutable = False self.pipeline_name = self.create_pipeline_name_from_name() self.file_name = self.pipeline_name + '.yaml' # check duplicates here as its a 'new' pipeline if self.file_name in \ Repository.get_instance().get_pipeline_file_paths( only_file_names=True): raise AssertionError( f'Pipeline names must be unique in the repository. There ' f'is already a pipeline called {self.name}') track(event=CREATE_PIPELINE) logger.info(f'Pipeline {name} created.') self.enable_cache = enable_cache if steps_dict is None: self.steps_dict: Dict[Text, BaseStep] = {} else: self.steps_dict = steps_dict # Backends if backends_dict is None: self.backends_dict: Dict[Text, BaseBackend] = \ self.get_default_backends() else: self.backends_dict = backends_dict # Artifact store if artifact_store: self.artifact_store = artifact_store else: # use default self.artifact_store = \ Repository.get_instance().get_default_artifact_store() # Datasource if datasource: self.datasource = datasource else: self.datasource = None
def __init__(self, name: Text = None, enable_cache: Optional[bool] = True, steps_dict: Dict[Text, BaseStep] = None, backend: OrchestratorBaseBackend = None, metadata_store: Optional[ZenMLMetadataStore] = None, artifact_store: Optional[ArtifactStore] = None, datasource: Optional[BaseDatasource] = None, pipeline_name: Optional[Text] = None, *args, **kwargs): """ Construct a base pipeline. This is a base interface that is meant to be overridden in multiple other pipeline use cases. Args: name: Outward-facing name of the pipeline. pipeline_name: A unique name that identifies the pipeline after it is run. enable_cache: Boolean, indicates whether or not caching should be used. steps_dict: Optional dict of steps. backend: Orchestrator backend. metadata_store: Configured metadata store. If None, the default metadata store is used. artifact_store: Configured artifact store. If None, the default artifact store is used. """ # Generate a name if not given if name is None: name = str(round(time.time() * 1000)) self.name = name self._immutable = False # Metadata store if metadata_store: self.metadata_store: ZenMLMetadataStore = metadata_store else: # use default self.metadata_store: ZenMLMetadataStore = \ Repository.get_instance().get_default_metadata_store() if pipeline_name: # This means its been loaded in through YAML, try to get context self.pipeline_name = pipeline_name self.file_name = self.pipeline_name + '.yaml' else: # if pipeline_name is None then its a new pipeline self.pipeline_name = self.create_pipeline_name_from_name() self.file_name = self.pipeline_name + '.yaml' # check duplicates here as its a 'new' pipeline self._check_registered() track(event=CREATE_PIPELINE) logger.info(f'Pipeline {name} created.') self.enable_cache = enable_cache if steps_dict is None: self.steps_dict: Dict[Text, BaseStep] = {} else: self.steps_dict = steps_dict # Default to local if backend is None: self.backend = OrchestratorBaseBackend() else: self.backend = backend # Artifact store if artifact_store: self.artifact_store = artifact_store else: # use default self.artifact_store = \ Repository.get_instance().get_default_artifact_store() # Datasource if datasource: self.datasource = datasource else: self.datasource = None self._source = source_utils.resolve_source_path( self.__class__.__module__ + '.' + self.__class__.__name__) self._kwargs = { keys.PipelineDetailKeys.NAME: self.pipeline_name, keys.PipelineDetailKeys.ENABLE_CACHE: self.enable_cache, } if kwargs: self._kwargs.update(kwargs)