class _SparkContext(ContextDecorator): """A class used to create the Spark cluster and get the Spark session. :param app_name the Spark application name :param num_executors the number of executor requested :param executor_cores the CPU cores for each executor :param executor_memory the memory size (eg: 10KB, 10MB..) for each executor :param configs the extra Spark configs need to set """ def __init__(self, app_name: str, num_executors: int, executor_cores: int, executor_memory: Union[str, int], configs: Dict[str, str] = None): self._app_name = app_name self._num_executors = num_executors self._executor_cores = executor_cores if isinstance(executor_memory, str): # If this is human readable str(like: 10KB, 10MB..), parse it executor_memory = parse_memory_size(executor_memory) self._executor_memory = executor_memory self._configs = {} if configs is None else configs self._spark_cluster: Optional[SparkCluster] = None self._spark_session: Optional[SparkSession] = None def _get_or_create_spark_cluster(self) -> SparkCluster: if self._spark_cluster is not None: return self._spark_cluster self._spark_cluster = SparkCluster() return self._spark_cluster def get_or_create_session(self): if self._spark_session is not None: return self._spark_session spark_cluster = self._get_or_create_spark_cluster() self._spark_session = spark_cluster.get_spark_session( self._app_name, self._num_executors, self._executor_cores, self._executor_memory, self._configs) return self._spark_session def stop(self): if self._spark_session is not None: self._spark_session.stop() self._spark_session = None if self._spark_cluster is not None: self._spark_cluster.stop() self._spark_cluster = None def __enter__(self): self.get_or_create_session() def __exit__(self, exc_type, exc_val, exc_tb): self.stop()
def _get_or_create_spark_cluster(self) -> SparkCluster: if self._spark_cluster is not None: return self._spark_cluster self._spark_cluster = SparkCluster() return self._spark_cluster