Example #1
0
class _SparkContext(ContextDecorator):
    """A class used to create the Spark cluster and get the Spark session.

    :param app_name the Spark application name
    :param num_executors the number of executor requested
    :param executor_cores the CPU cores for each executor
    :param executor_memory the memory size (eg: 10KB, 10MB..) for each executor
    :param configs the extra Spark configs need to set
    """
    def __init__(self,
                 app_name: str,
                 num_executors: int,
                 executor_cores: int,
                 executor_memory: Union[str, int],
                 configs: Dict[str, str] = None):
        self._app_name = app_name
        self._num_executors = num_executors
        self._executor_cores = executor_cores

        if isinstance(executor_memory, str):
            # If this is human readable str(like: 10KB, 10MB..), parse it
            executor_memory = parse_memory_size(executor_memory)

        self._executor_memory = executor_memory
        self._configs = {} if configs is None else configs

        self._spark_cluster: Optional[SparkCluster] = None
        self._spark_session: Optional[SparkSession] = None

    def _get_or_create_spark_cluster(self) -> SparkCluster:
        if self._spark_cluster is not None:
            return self._spark_cluster
        self._spark_cluster = SparkCluster()
        return self._spark_cluster

    def get_or_create_session(self):
        if self._spark_session is not None:
            return self._spark_session
        spark_cluster = self._get_or_create_spark_cluster()
        self._spark_session = spark_cluster.get_spark_session(
            self._app_name,
            self._num_executors,
            self._executor_cores,
            self._executor_memory,
            self._configs)
        return self._spark_session

    def stop(self):
        if self._spark_session is not None:
            self._spark_session.stop()
            self._spark_session = None
        if self._spark_cluster is not None:
            self._spark_cluster.stop()
            self._spark_cluster = None

    def __enter__(self):
        self.get_or_create_session()

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.stop()
Example #2
0
 def _get_or_create_spark_cluster(self) -> SparkCluster:
     if self._spark_cluster is not None:
         return self._spark_cluster
     self._spark_cluster = SparkCluster()
     return self._spark_cluster