Beispiel #1
0
    def __init__(self, sc=None, appName="Hail", master=None, local='local[*]',
                 log='hail.log', quiet=False, append=False, parquet_compression='uncompressed',
                 block_size=1, branching_factor=50, tmp_dir='/tmp'):
        from pyspark import SparkContext
        SparkContext._ensure_initialized()

        self._gateway = SparkContext._gateway
        self._jvm = SparkContext._jvm

        Env._jvm = self._jvm
        Env._gateway = self._gateway

        # hail package
        self._hail = getattr(self._jvm, 'is').hail

        driver = scala_package_object(self._hail.driver)

        if not sc:
            self._jsc = driver.configureAndCreateSparkContext(
                appName, joption(master), local, parquet_compression, block_size)
            self.sc = SparkContext(gateway=self._gateway, jsc=self._jvm.JavaSparkContext(self._jsc))
        else:
            self.sc = sc
            # sc._jsc is a JavaSparkContext
            self._jsc = sc._jsc.sc()

        driver.configureHail(branching_factor, tmp_dir)
        driver.configureLogging(log, quiet, append)

        self._jsql_context = driver.createSQLContext(self._jsc)
        self._sql_context = SQLContext(self.sc, self._jsql_context)
Beispiel #2
0
 def export_mongodb(self, mode='append'):
     """Export to MongoDB"""
     (scala_package_object(self.hc._hail.driver).exportMongoDB(
         self.hc._jsql_context, self._jkt, mode))