def __init__(self, sc=None, app_name="Hail", master=None, local='local[*]', log=None, quiet=False, append=False, min_block_size=1, branching_factor=50, tmp_dir=None, default_reference="GRCh37", idempotent=False, global_seed=6348563392232659379, _backend=None): if Env._hc: if idempotent: return else: raise FatalError( 'Hail has already been initialized, restart session ' 'or stop Hail to change configuration.') if pkg_resources.resource_exists(__name__, "hail-all-spark.jar"): hail_jar_path = pkg_resources.resource_filename( __name__, "hail-all-spark.jar") assert os.path.exists( hail_jar_path), f'{hail_jar_path} does not exist' sys.stderr.write(f'using hail jar at {hail_jar_path}\n') conf = SparkConf() conf.set('spark.driver.extraClassPath', hail_jar_path) conf.set('spark.executor.extraClassPath', hail_jar_path) SparkContext._ensure_initialized(conf=conf) else: SparkContext._ensure_initialized() self._gateway = SparkContext._gateway self._jvm = SparkContext._jvm # hail package self._hail = getattr(self._jvm, 'is').hail self._warn_cols_order = True self._warn_entries_order = True Env._jvm = self._jvm Env._gateway = self._gateway jsc = sc._jsc.sc() if sc else None if _backend is None: _backend = SparkBackend() self._backend = _backend tmp_dir = get_env_or_default(tmp_dir, 'TMPDIR', '/tmp') version = read_version_info() hail.__version__ = version if log is None: log = hail.utils.timestamp_path(os.path.join(os.getcwd(), 'hail'), suffix=f'-{version}.log') self._log = log # we always pass 'quiet' to the JVM because stderr output needs # to be routed through Python separately. # if idempotent: if idempotent: self._jhc = self._hail.HailContext.getOrCreate( jsc, app_name, joption(master), local, log, True, append, min_block_size, branching_factor, tmp_dir) else: self._jhc = self._hail.HailContext.apply(jsc, app_name, joption(master), local, log, True, append, min_block_size, branching_factor, tmp_dir) self._jsc = self._jhc.sc() self.sc = sc if sc else SparkContext( gateway=self._gateway, jsc=self._jvm.JavaSparkContext(self._jsc)) self._jsql_context = self._jhc.sqlContext() self._sql_context = SQLContext(self.sc, jsqlContext=self._jsql_context) super(HailContext, self).__init__() # do this at the end in case something errors, so we don't raise the above error without a real HC Env._hc = self self._default_ref = None Env.hail().variant.ReferenceGenome.setDefaultReference( self._jhc, default_reference) jar_version = self._jhc.version() if jar_version != version: raise RuntimeError( f"Hail version mismatch between JAR and Python library\n" f" JAR: {jar_version}\n" f" Python: {version}") if not quiet: sys.stderr.write('Running on Apache Spark version {}\n'.format( self.sc.version)) if self._jsc.uiWebUrl().isDefined(): sys.stderr.write('SparkUI available at {}\n'.format( self._jsc.uiWebUrl().get())) connect_logger('localhost', 12888) self._hail.HailContext.startProgressBar(self._jsc) sys.stderr.write( 'Welcome to\n' ' __ __ <>__\n' ' / /_/ /__ __/ /\n' ' / __ / _ `/ / /\n' ' /_/ /_/\\_,_/_/_/ version {}\n'.format(version)) if version.startswith('devel'): sys.stderr.write( 'NOTE: This is a beta version. Interfaces may change\n' ' during the beta period. We recommend pulling\n' ' the latest changes weekly.\n') sys.stderr.write(f'LOGGING: writing to {log}\n') install_exception_handler() Env.set_seed(global_seed)
def __init__(self, sc=None, app_name="Hail", master=None, local='local[*]', log=None, quiet=False, append=False, min_block_size=1, branching_factor=50, tmp_dir=None, default_reference="GRCh37", idempotent=False, global_seed=6348563392232659379, _backend=None): if Env._hc: if idempotent: return else: raise FatalError('Hail has already been initialized, restart session ' 'or stop Hail to change configuration.') if pkg_resources.resource_exists(__name__, "hail-all-spark.jar"): hail_jar_path = pkg_resources.resource_filename(__name__, "hail-all-spark.jar") assert os.path.exists(hail_jar_path), f'{hail_jar_path} does not exist' sys.stderr.write(f'using hail jar at {hail_jar_path}\n') conf = SparkConf() conf.set('spark.driver.extraClassPath', hail_jar_path) conf.set('spark.executor.extraClassPath', hail_jar_path) SparkContext._ensure_initialized(conf=conf) else: SparkContext._ensure_initialized() self._gateway = SparkContext._gateway self._jvm = SparkContext._jvm # hail package self._hail = getattr(self._jvm, 'is').hail self._warn_cols_order = True self._warn_entries_order = True Env._jvm = self._jvm Env._gateway = self._gateway jsc = sc._jsc.sc() if sc else None if _backend is None: _backend = SparkBackend() self._backend = _backend tmp_dir = get_env_or_default(tmp_dir, 'TMPDIR', '/tmp') version = read_version_info() hail.__version__ = version if log is None: log = hail.utils.timestamp_path(os.path.join(os.getcwd(), 'hail'), suffix=f'-{version}.log') self._log = log # we always pass 'quiet' to the JVM because stderr output needs # to be routed through Python separately. # if idempotent: if idempotent: self._jhc = self._hail.HailContext.getOrCreate( jsc, app_name, joption(master), local, log, True, append, min_block_size, branching_factor, tmp_dir) else: self._jhc = self._hail.HailContext.apply( jsc, app_name, joption(master), local, log, True, append, min_block_size, branching_factor, tmp_dir) self._jsc = self._jhc.sc() self.sc = sc if sc else SparkContext(gateway=self._gateway, jsc=self._jvm.JavaSparkContext(self._jsc)) self._jsql_context = self._jhc.sqlContext() self._sql_context = SQLContext(self.sc, jsqlContext=self._jsql_context) super(HailContext, self).__init__() # do this at the end in case something errors, so we don't raise the above error without a real HC Env._hc = self self._default_ref = None Env.hail().variant.ReferenceGenome.setDefaultReference(self._jhc, default_reference) jar_version = self._jhc.version() if jar_version != version: raise RuntimeError(f"Hail version mismatch between JAR and Python library\n" f" JAR: {jar_version}\n" f" Python: {version}") if not quiet: sys.stderr.write('Running on Apache Spark version {}\n'.format(self.sc.version)) if self._jsc.uiWebUrl().isDefined(): sys.stderr.write('SparkUI available at {}\n'.format(self._jsc.uiWebUrl().get())) connect_logger('localhost', 12888) self._hail.HailContext.startProgressBar(self._jsc) sys.stderr.write( 'Welcome to\n' ' __ __ <>__\n' ' / /_/ /__ __/ /\n' ' / __ / _ `/ / /\n' ' /_/ /_/\\_,_/_/_/ version {}\n'.format(version)) if version.startswith('devel'): sys.stderr.write('NOTE: This is a beta version. Interfaces may change\n' ' during the beta period. We recommend pulling\n' ' the latest changes weekly.\n') sys.stderr.write(f'LOGGING: writing to {log}\n') install_exception_handler() Env.set_seed(global_seed)
def __init__(self, sc=None, app_name="Hail", master=None, local='local[*]', log='hail.log', quiet=False, append=False, min_block_size=1, branching_factor=50, tmp_dir=None, default_reference="GRCh37", force_ir=False): if Env._hc: raise FatalError( 'Hail has already been initialized, restart session ' 'or stop Hail to change configuration.') SparkContext._ensure_initialized() self._gateway = SparkContext._gateway self._jvm = SparkContext._jvm # hail package self._hail = getattr(self._jvm, 'is').hail Env._jvm = self._jvm Env._gateway = self._gateway jsc = sc._jsc.sc() if sc else None tmp_dir = get_env_or_default(tmp_dir, 'TMPDIR', '/tmp') # we always pass 'quiet' to the JVM because stderr output needs # to be routed through Python separately. self._jhc = self._hail.HailContext.apply(jsc, app_name, joption(master), local, log, True, append, min_block_size, branching_factor, tmp_dir, force_ir) self._jsc = self._jhc.sc() self.sc = sc if sc else SparkContext( gateway=self._gateway, jsc=self._jvm.JavaSparkContext(self._jsc)) self._jsql_context = self._jhc.sqlContext() self._sql_context = SQLContext(self.sc, jsqlContext=self._jsql_context) self._counter = 1 super(HailContext, self).__init__() # do this at the end in case something errors, so we don't raise the above error without a real HC Env._hc = self self._default_ref = None Env.hail().variant.ReferenceGenome.setDefaultReference( self._jhc, default_reference) version = self._jhc.version() hail.__version__ = version if not quiet: sys.stderr.write('Running on Apache Spark version {}\n'.format( self.sc.version)) if self._jsc.uiWebUrl().isDefined(): sys.stderr.write('SparkUI available at {}\n'.format( self._jsc.uiWebUrl().get())) connect_logger('localhost', 12888) self._hail.HailContext.startProgressBar(self._jsc) sys.stderr.write( 'Welcome to\n' ' __ __ <>__\n' ' / /_/ /__ __/ /\n' ' / __ / _ `/ / /\n' ' /_/ /_/\_,_/_/_/ version {}\n'.format(version)) if version.startswith('devel'): sys.stderr.write( 'NOTE: This is a beta version. Interfaces may change\n' ' during the beta period. We recommend pulling\n' ' the latest changes weekly.\n') install_exception_handler()
def __init__(self, sc=None, app_name="Hail", master=None, local='local[*]', log=None, quiet=False, append=False, min_block_size=1, branching_factor=50, tmp_dir=None, default_reference="GRCh37", idempotent=False, global_seed=6348563392232659379, spark_conf=None, optimizer_iterations=None, _backend=None): if Env._hc: if idempotent: return else: raise FatalError( 'Hail has already been initialized, restart session ' 'or stop Hail to change configuration.') if pkg_resources.resource_exists(__name__, "hail-all-spark.jar"): hail_jar_path = pkg_resources.resource_filename( __name__, "hail-all-spark.jar") assert os.path.exists( hail_jar_path), f'{hail_jar_path} does not exist' conf = SparkConf() base_conf = spark_conf or {} for k, v in base_conf.items(): conf.set(k, v) jars = [hail_jar_path] if os.environ.get('HAIL_SPARK_MONITOR'): import sparkmonitor jars.append( os.path.join(os.path.dirname(sparkmonitor.__file__), 'listener.jar')) conf.set("spark.extraListeners", "sparkmonitor.listener.JupyterSparkMonitorListener") conf.set('spark.jars', ','.join(jars)) conf.set('spark.driver.extraClassPath', ','.join(jars)) conf.set('spark.executor.extraClassPath', './hail-all-spark.jar') if sc is None: SparkContext._ensure_initialized(conf=conf) else: import warnings warnings.warn( 'pip-installed Hail requires additional configuration options in Spark referring\n' ' to the path to the Hail Python module directory HAIL_DIR,\n' ' e.g. /path/to/python/site-packages/hail:\n' ' spark.jars=HAIL_DIR/hail-all-spark.jar\n' ' spark.driver.extraClassPath=HAIL_DIR/hail-all-spark.jar\n' ' spark.executor.extraClassPath=./hail-all-spark.jar') else: SparkContext._ensure_initialized() self._gateway = SparkContext._gateway self._jvm = SparkContext._jvm # hail package self._hail = getattr(self._jvm, 'is').hail self._warn_cols_order = True self._warn_entries_order = True Env._jvm = self._jvm Env._gateway = self._gateway jsc = sc._jsc.sc() if sc else None if _backend is None: if os.environ.get('HAIL_APISERVER_URL') is not None: _backend = ServiceBackend() else: _backend = SparkBackend() self._backend = _backend tmp_dir = get_env_or_default(tmp_dir, 'TMPDIR', '/tmp') optimizer_iterations = get_env_or_default(optimizer_iterations, 'HAIL_OPTIMIZER_ITERATIONS', 3) py_version = version() if log is None: log = hail.utils.timestamp_path(os.path.join(os.getcwd(), 'hail'), suffix=f'-{py_version}.log') self._log = log # we always pass 'quiet' to the JVM because stderr output needs # to be routed through Python separately. # if idempotent: if idempotent: self._jhc = self._hail.HailContext.getOrCreate( jsc, app_name, joption(master), local, log, True, append, min_block_size, branching_factor, tmp_dir, optimizer_iterations) else: self._jhc = self._hail.HailContext.apply(jsc, app_name, joption(master), local, log, True, append, min_block_size, branching_factor, tmp_dir, optimizer_iterations) self._jsc = self._jhc.sc() self.sc = sc if sc else SparkContext( gateway=self._gateway, jsc=self._jvm.JavaSparkContext(self._jsc)) self._jspark_session = self._jhc.sparkSession() self._spark_session = SparkSession(self.sc, self._jhc.sparkSession()) super(HailContext, self).__init__() # do this at the end in case something errors, so we don't raise the above error without a real HC Env._hc = self ReferenceGenome._from_config(_backend.get_reference('GRCh37'), True) ReferenceGenome._from_config(_backend.get_reference('GRCh38'), True) ReferenceGenome._from_config(_backend.get_reference('GRCm38'), True) if default_reference in ReferenceGenome._references: self._default_ref = ReferenceGenome._references[default_reference] else: self._default_ref = ReferenceGenome.read(default_reference) jar_version = self._jhc.version() if jar_version != py_version: raise RuntimeError( f"Hail version mismatch between JAR and Python library\n" f" JAR: {jar_version}\n" f" Python: {py_version}") if not quiet: sys.stderr.write('Running on Apache Spark version {}\n'.format( self.sc.version)) if self._jsc.uiWebUrl().isDefined(): sys.stderr.write('SparkUI available at {}\n'.format( self._jsc.uiWebUrl().get())) connect_logger('localhost', 12888) self._hail.HailContext.startProgressBar(self._jsc) sys.stderr.write( 'Welcome to\n' ' __ __ <>__\n' ' / /_/ /__ __/ /\n' ' / __ / _ `/ / /\n' ' /_/ /_/\\_,_/_/_/ version {}\n'.format(py_version)) if py_version.startswith('devel'): sys.stderr.write( 'NOTE: This is a beta version. Interfaces may change\n' ' during the beta period. We recommend pulling\n' ' the latest changes weekly.\n') sys.stderr.write(f'LOGGING: writing to {log}\n') install_exception_handler() Env.set_seed(global_seed)