def __init__(self, lakefs_conn_id: str, repo: str, branch: str, **kwargs: Any) -> None: super().__init__(**kwargs) self.lakefs_conn_id = lakefs_conn_id self.repo = repo self.branch = branch self.hook = LakeFSHook(lakefs_conn_id) self.prev_commit_id = None
def execute(self, context: Dict[str, Any]) -> Any: hook = LakeFSHook(lakefs_conn_id=self.lakefs_conn_id) self.log.info("Create lakeFS branch '%s' in repo '%s' from source '%s'", self.branch, self.repo, self.source_branch) ref = hook.create_branch(self.repo, self.branch, self.source_branch) return ref
def execute(self, context: Dict[str, Any]) -> Any: hook = LakeFSHook(lakefs_conn_id=self.lakefs_conn_id) self.log.info("Committing to lakeFS branch '%s' in repo '%s'", self.branch, self.repo) self.metadata.__setitem__("airflow_task_id", self.task_id) ref = hook.commit(self.repo, self.branch, self.msg, self.metadata) return ref
def execute(self, context: Dict[str, Any]) -> Any: hook = LakeFSHook(lakefs_conn_id=self.lakefs_conn_id) self.log.info( "Merging to lakeFS branch '%s' in repo '%s' from source ref '%s'", self.destination_branch, self.repo, self.source_ref) self.metadata.__setitem__("airflow_task_id", self.task_id) ref = hook.merge(self.repo, self.source_ref, self.destination_branch, self.msg, self.metadata) return ref
class LakeFSFileSensor(BaseSensorOperator): """ Waits for the given file to appear :param lakefs_conn_id: The connection to run the sensor against :type lakefs_conn_id: str :param repo: The lakeFS repo. :type repo: str :param branch: The branch to sense for. :type branch: str :param path: The path to wait for. :type path: str """ # Specify the arguments that are allowed to parse with jinja templating template_fields = [ 'repo', 'branch', 'path', ] @apply_defaults def __init__(self, lakefs_conn_id: str, repo: str, branch: str, path: str, **kwargs: Any) -> None: super().__init__(**kwargs) self.lakefs_conn_id = lakefs_conn_id self.repo = repo self.branch = branch self.path = path self.hook = LakeFSHook(lakefs_conn_id) def poke(self, context: Dict[Any, Any]) -> bool: try: self.hook.stat_object(self.repo, self.branch, self.path) self.log.info("Found file '%s' on branch '%s'", self.path, self.branch) return True except NotFoundException: self.log.info("File '%s' not found on branch '%s'", self.path, self.branch) return False
class LakeFSCommitSensor(BaseSensorOperator): """ Executes a get branch operation until that branch was committed. :param lakefs_conn_id: The connection to run the sensor against :type lakefs_conn_id: str :param repo: The lakeFS repo. :type repo: str :param branch: The branch to sense for :type branch: str """ # Specify the arguments that are allowed to parse with jinja templating template_fields = [ 'repo', 'branch', ] current_commit_id_key = 'current_commit_id' branch_not_found_error = "Resource Not Found" @apply_defaults def __init__(self, lakefs_conn_id: str, repo: str, branch: str, **kwargs: Any) -> None: super().__init__(**kwargs) self.lakefs_conn_id = lakefs_conn_id self.repo = repo self.branch = branch self.hook = LakeFSHook(lakefs_conn_id) self.prev_commit_id = None def poke(self, context: Dict[Any, Any]) -> bool: if self.prev_commit_id is None: self.prev_commit_id = context.get(self.current_commit_id_key, None) if self.prev_commit_id is None: self.prev_commit_id, _ = self.get_commit() return False self.log.info('Poking: branch %s on repo %s', self.branch, self.repo) curr_commit_id, branch_exists = self.get_commit() if not branch_exists: return False self.log.info('Previous ref: %s, current ref %s', self.prev_commit_id, curr_commit_id) return curr_commit_id != self.prev_commit_id def get_commit(self) -> (str, bool): try: commit_id = self.hook.get_branch_commit_id(self.repo, self.branch) except NotFoundException: self.log.info("Branch '%s' not found in repo '%s'", self.branch, self.repo) return None, False return commit_id, True