def update_datajob_stack_resources(self, resource: object) -> None: """add a DataJob resource to the DataJob stack resources variable. Args: resource: A DataJobBase implementation. we cannot reference it here explicitly in the typing, because then we have a circular dependency conflict. Returns: None """ logger.info(f"adding job {self} to stack workflow resources") self.resources.append(resource)
def __init__(self, datajob_stack, name, **kwargs): super().__init__(datajob_stack, name, **kwargs) assert isinstance( datajob_stack, DataJobStack ), f"we expect the scope argument to be of type {DataJobStack}" self.name = name self.project_root = datajob_stack.project_root self.stage = datajob_stack.stage self.unique_name = f"{datajob_stack.unique_stack_name}-{self.name}" self.context = datajob_stack.context logger.info(f"adding job {self} to stack workflow resources") datajob_stack.resources.append(self)
def __init__( self, datajob_stack: core.Construct, name: str, job_path: str, job_type: str = GlueJobType.PYTHONSHELL.value, glue_version: str = None, max_capacity: int = None, arguments: dict = None, python_version: str = "3", role: iam.Role = None, worker_type: str = None, number_of_workers: int = None, *args, **kwargs, ): """ :param datajob_stack: aws cdk core construct object. :param name: a name for this glue job (will appear on the glue console). :param job_path: the path to the glue job relative to the project root. :param job_type: choose pythonshell for plain python / glueetl for a spark cluster. pythonshell is the default. :param glue_version: at the time of writing choose 1.0 for pythonshell / 2.0 for spark. :param max_capacity: max nodes we want to run. :param arguments: the arguments as a dict for this glue job. :param python_version: 3 is the default :param role: you can provide a cdk iam role object as arg. if not provided this class will instantiate a role, :param worker_type: you can provide a worker type Standard / G.1X / G.2X :param number_of_workers: for pythonshell is this 0.0625 or 1. for glueetl is this minimum 2. :param args: any extra args for the glue.CfnJob :param kwargs: any extra kwargs for the glue.CfnJob """ logger.info(f"creating glue job {name}") super().__init__(datajob_stack, name, **kwargs) self.job_path = GlueJob._get_job_path(self.project_root, job_path) self.arguments = arguments or {} self.job_type = GlueJob._get_job_type(job_type=job_type) self.python_version = python_version self.glue_version = GlueJob._get_glue_version( glue_version=glue_version, job_type=job_type) self.max_capacity = max_capacity self.role = self._get_role(role, self.unique_name) self.worker_type = worker_type self.number_of_workers = number_of_workers self.args = args self.kwargs = kwargs logger.info(f"glue job {name} created.")
def _execute_packaging_logic(project_root: str, config_file: str, cmd: str) -> None: """ check if the config file exists in the project root and execute the command to create a wheel. :param project_root: the path to the root of your project. :param config_file: the confgi file to package the project as a wheel (setup.py or pyproject.toml) :param cmd: the command to execute to create a wheel. :return: None """ config_file_full_path = Path(project_root, config_file) logger.info(f"expecting {config_file_full_path}") if not config_file_full_path.is_file(): raise DatajobPackageWheelError( f"no {config_file} file detected in project root {project_root}. " f"Hence we cannot create a python wheel for this project" ) logger.debug(f"found a {config_file} file in {project_root}") call_subprocess(cmd=cmd)
def __init__( self, scope: core.Construct, unique_stack_name: str, project_root: str = None, include_folder: str = None, **kwargs, ) -> None: """ :param scope: aws cdk core construct object. :param unique_stack_name: a unique name for this stack. like this the name of our resources will not collide with other deployments. :param project_root: the path to the root of this project :param include_folder: specify the name of the folder we would like to include in the deployment bucket. """ logger.info("creating datajob context.") super().__init__(scope, unique_stack_name, **kwargs) self.project_root = project_root self.unique_stack_name = unique_stack_name ( self.deployment_bucket, self.deployment_bucket_name, ) = self._create_deployment_bucket(self.unique_stack_name) (self.data_bucket, self.data_bucket_name) = self._create_data_bucket( self.unique_stack_name) self.s3_url_wheel = None if self.project_root: self.s3_url_wheel = self._deploy_wheel( self.unique_stack_name, self.project_root, self.deployment_bucket, self.deployment_bucket_name, ) if include_folder: self._deploy_local_folder(include_folder) logger.info("datajob context created.")
def __init__( self, scope: core.Construct, project_root: str = None, include_folder: str = None, **kwargs, ) -> None: """ :param scope: aws cdk core construct object. :param stage: stage from DataJobStack. :param project_root: the path to the root of this project :param include_folder: specify the name of the folder we would like to include in the deployment bucket. """ logger.info("creating datajob context.") self.unique_stack_name = scope.unique_stack_name super().__init__(scope, self.unique_stack_name, **kwargs) self.stage = scope.stage self.bucket_suffix = None self.project_root = project_root ( self.deployment_bucket, self.deployment_bucket_name, ) = self._create_deployment_bucket() (self.data_bucket, self.data_bucket_name) = self._create_data_bucket() self.s3_url_wheel = None if self.project_root: self.s3_url_wheel = self._deploy_wheel( self.unique_stack_name, self.project_root, self.deployment_bucket, self.deployment_bucket_name, ) if include_folder: self._deploy_local_folder(include_folder) logger.info("datajob context created.")
def __exit__(self, exc_type, exc_value, traceback) -> None: """steps we have to do when exiting the context manager.""" self.build_workflow() _set_workflow(None) logger.info(f"step functions workflow {self.unique_name} created")
def __enter__(self): """first steps we have to do when entering the context manager.""" logger.info(f"creating step functions workflow for {self.unique_name}") _set_workflow(self) return self