Ejemplo n.º 1
0

@typecheck(sc=nullable(SparkContext),
           app_name=str,
           master=nullable(str),
           local=str,
           log=nullable(str),
           quiet=bool,
           append=bool,
           min_block_size=int,
           branching_factor=int,
           tmp_dir=nullable(str),
           default_reference=enumeration(*BUILTIN_REFERENCES),
           idempotent=bool,
           global_seed=nullable(int),
           spark_conf=nullable(dictof(str, str)),
           skip_logging_configuration=bool,
           local_tmpdir=nullable(str),
           _optimizer_iterations=nullable(int))
def init(sc=None,
         app_name='Hail',
         master=None,
         local='local[*]',
         log=None,
         quiet=False,
         append=False,
         min_block_size=0,
         branching_factor=50,
         tmp_dir=None,
         default_reference='GRCh37',
         idempotent=False,
Ejemplo n.º 2
0
class HailContext(object):
    @typecheck_method(sc=nullable(SparkContext),
                      app_name=str,
                      master=nullable(str),
                      local=str,
                      log=nullable(str),
                      quiet=bool,
                      append=bool,
                      min_block_size=int,
                      branching_factor=int,
                      tmp_dir=nullable(str),
                      default_reference=str,
                      idempotent=bool,
                      global_seed=nullable(int),
                      spark_conf=nullable(dictof(str, str)),
                      optimizer_iterations=nullable(int),
                      _backend=nullable(Backend))
    def __init__(self,
                 sc=None,
                 app_name="Hail",
                 master=None,
                 local='local[*]',
                 log=None,
                 quiet=False,
                 append=False,
                 min_block_size=1,
                 branching_factor=50,
                 tmp_dir=None,
                 default_reference="GRCh37",
                 idempotent=False,
                 global_seed=6348563392232659379,
                 spark_conf=None,
                 optimizer_iterations=None,
                 _backend=None):

        if Env._hc:
            if idempotent:
                return
            else:
                raise FatalError(
                    'Hail has already been initialized, restart session '
                    'or stop Hail to change configuration.')

        if pkg_resources.resource_exists(__name__, "hail-all-spark.jar"):
            hail_jar_path = pkg_resources.resource_filename(
                __name__, "hail-all-spark.jar")
            assert os.path.exists(
                hail_jar_path), f'{hail_jar_path} does not exist'
            conf = SparkConf()

            base_conf = spark_conf or {}
            for k, v in base_conf.items():
                conf.set(k, v)

            jars = [hail_jar_path]

            if os.environ.get('HAIL_SPARK_MONITOR'):
                import sparkmonitor
                jars.append(
                    os.path.join(os.path.dirname(sparkmonitor.__file__),
                                 'listener.jar'))
                conf.set("spark.extraListeners",
                         "sparkmonitor.listener.JupyterSparkMonitorListener")

            conf.set('spark.jars', ','.join(jars))
            conf.set('spark.driver.extraClassPath', ','.join(jars))
            conf.set('spark.executor.extraClassPath', './hail-all-spark.jar')
            if sc is None:
                SparkContext._ensure_initialized(conf=conf)
            else:
                import warnings
                warnings.warn(
                    'pip-installed Hail requires additional configuration options in Spark referring\n'
                    '  to the path to the Hail Python module directory HAIL_DIR,\n'
                    '  e.g. /path/to/python/site-packages/hail:\n'
                    '    spark.jars=HAIL_DIR/hail-all-spark.jar\n'
                    '    spark.driver.extraClassPath=HAIL_DIR/hail-all-spark.jar\n'
                    '    spark.executor.extraClassPath=./hail-all-spark.jar')
        else:
            SparkContext._ensure_initialized()

        self._gateway = SparkContext._gateway
        self._jvm = SparkContext._jvm

        # hail package
        self._hail = getattr(self._jvm, 'is').hail

        self._warn_cols_order = True
        self._warn_entries_order = True

        Env._jvm = self._jvm
        Env._gateway = self._gateway

        jsc = sc._jsc.sc() if sc else None

        if _backend is None:
            if os.environ.get('HAIL_APISERVER_URL') is not None:
                _backend = ServiceBackend()
            else:
                _backend = SparkBackend()
        self._backend = _backend

        tmp_dir = get_env_or_default(tmp_dir, 'TMPDIR', '/tmp')
        optimizer_iterations = get_env_or_default(optimizer_iterations,
                                                  'HAIL_OPTIMIZER_ITERATIONS',
                                                  3)

        py_version = version()

        if log is None:
            log = hail.utils.timestamp_path(os.path.join(os.getcwd(), 'hail'),
                                            suffix=f'-{py_version}.log')
        self._log = log

        # we always pass 'quiet' to the JVM because stderr output needs
        # to be routed through Python separately.
        # if idempotent:
        if idempotent:
            self._jhc = self._hail.HailContext.getOrCreate(
                jsc, app_name, joption(master), local, log, True, append,
                min_block_size, branching_factor, tmp_dir,
                optimizer_iterations)
        else:
            self._jhc = self._hail.HailContext.apply(jsc, app_name,
                                                     joption(master), local,
                                                     log, True, append,
                                                     min_block_size,
                                                     branching_factor, tmp_dir,
                                                     optimizer_iterations)

        self._jsc = self._jhc.sc()
        self.sc = sc if sc else SparkContext(
            gateway=self._gateway, jsc=self._jvm.JavaSparkContext(self._jsc))
        self._jspark_session = self._jhc.sparkSession()
        self._spark_session = SparkSession(self.sc, self._jhc.sparkSession())

        super(HailContext, self).__init__()

        # do this at the end in case something errors, so we don't raise the above error without a real HC
        Env._hc = self

        ReferenceGenome._from_config(_backend.get_reference('GRCh37'), True)
        ReferenceGenome._from_config(_backend.get_reference('GRCh38'), True)
        ReferenceGenome._from_config(_backend.get_reference('GRCm38'), True)

        if default_reference in ReferenceGenome._references:
            self._default_ref = ReferenceGenome._references[default_reference]
        else:
            self._default_ref = ReferenceGenome.read(default_reference)

        jar_version = self._jhc.version()

        if jar_version != py_version:
            raise RuntimeError(
                f"Hail version mismatch between JAR and Python library\n"
                f"  JAR:    {jar_version}\n"
                f"  Python: {py_version}")

        if not quiet:
            sys.stderr.write('Running on Apache Spark version {}\n'.format(
                self.sc.version))
            if self._jsc.uiWebUrl().isDefined():
                sys.stderr.write('SparkUI available at {}\n'.format(
                    self._jsc.uiWebUrl().get()))

            connect_logger('localhost', 12888)

            self._hail.HailContext.startProgressBar(self._jsc)

            sys.stderr.write(
                'Welcome to\n'
                '     __  __     <>__\n'
                '    / /_/ /__  __/ /\n'
                '   / __  / _ `/ / /\n'
                '  /_/ /_/\\_,_/_/_/   version {}\n'.format(py_version))

            if py_version.startswith('devel'):
                sys.stderr.write(
                    'NOTE: This is a beta version. Interfaces may change\n'
                    '  during the beta period. We recommend pulling\n'
                    '  the latest changes weekly.\n')
            sys.stderr.write(f'LOGGING: writing to {log}\n')

        install_exception_handler()
        Env.set_seed(global_seed)

    @property
    def default_reference(self):
        return self._default_ref

    def stop(self):
        Env.hail().HailContext.clear()
        self.sc.stop()
        self.sc = None
        Env._jvm = None
        Env._gateway = None
        Env._hc = None
        uninstall_exception_handler()
        Env._dummy_table = None
        Env._seed_generator = None
        hail.ir.clear_session_functions()
        ReferenceGenome._references = {}
Ejemplo n.º 3
0
class ReferenceGenome(object):
    """An object that represents a `reference genome <https://en.wikipedia.org/wiki/Reference_genome>`__.

    Examples
    --------

    >>> contigs = ["1", "X", "Y", "MT"]
    >>> lengths = {"1": 249250621, "X": 155270560, "Y": 59373566, "MT": 16569}
    >>> par = [("X", 60001, 2699521)]
    >>> my_ref = hl.ReferenceGenome("my_ref", contigs, lengths, "X", "Y", "MT", par)

    Notes
    -----
    Hail comes with predefined reference genomes (case sensitive!):

     - GRCh37, Genome Reference Consortium Human Build 37
     - GRCh38, Genome Reference Consortium Human Build 38
     - GRCm38, Genome Reference Consortium Mouse Build 38
     - CanFam3, Canis lupus familiaris (dog)

    You can access these reference genome objects using :func:`~hail.get_reference`:

    >>> rg = hl.get_reference('GRCh37')
    >>> rg = hl.get_reference('GRCh38')
    >>> rg = hl.get_reference('GRCm38')
    >>> rg = hl.get_reference('CanFam3')

    Note that constructing a new reference genome, either by using the class
    constructor or by using `read` will add the reference genome to the list of
    known references; it is possible to access the reference genome using
    :func:`~hail.get_reference` anytime afterwards.

    Note
    ----
    Reference genome names must be unique. It is not possible to overwrite the
    built-in reference genomes.

    Parameters
    ----------
    name : :class:`str`
        Name of reference. Must be unique and NOT one of Hail's
        predefined references: ``'GRCh37'``, ``'GRCh38'``, ``'GRCm38'``,
        ``'CanFam3'`` and ``'default'``.
    contigs : :obj:`list` of :class:`str`
        Contig names.
    lengths : :obj:`dict` of :class:`str` to :obj:`int`
        Dict of contig names to contig lengths.
    x_contigs : :class:`str` or :obj:`list` of :obj:`str`
        Contigs to be treated as X chromosomes.
    y_contigs : :class:`str` or :obj:`list` of :obj:`str`
        Contigs to be treated as Y chromosomes.
    mt_contigs : :class:`str` or :obj:`list` of :obj:`str`
        Contigs to be treated as mitochondrial DNA.
    par : :obj:`list` of :obj:`tuple` of (str, int, int)
        List of tuples with (contig, start, end)
    """

    _references = {}

    @classmethod
    def _from_config(cls, config, _builtin=False):
        def par_tuple(p):
            assert p['start']['contig'] == p['end']['contig']
            return (p['start']['contig'], p['start']['position'],
                    p['end']['position'])

        contigs = config['contigs']
        return ReferenceGenome(config['name'], [c['name'] for c in contigs],
                               {c['name']: c['length']
                                for c in contigs}, config['xContigs'],
                               config['yContigs'], config['mtContigs'],
                               [par_tuple(p) for p in config['par']], _builtin)

    @typecheck_method(name=str,
                      contigs=sequenceof(str),
                      lengths=dictof(str, int),
                      x_contigs=oneof(str, sequenceof(str)),
                      y_contigs=oneof(str, sequenceof(str)),
                      mt_contigs=oneof(str, sequenceof(str)),
                      par=sequenceof(sized_tupleof(str, int, int)),
                      _builtin=bool)
    def __init__(self,
                 name,
                 contigs,
                 lengths,
                 x_contigs=[],
                 y_contigs=[],
                 mt_contigs=[],
                 par=[],
                 _builtin=False):
        super(ReferenceGenome, self).__init__()

        contigs = wrap_to_list(contigs)
        x_contigs = wrap_to_list(x_contigs)
        y_contigs = wrap_to_list(y_contigs)
        mt_contigs = wrap_to_list(mt_contigs)

        self._config = {
            'name':
            name,
            'contigs': [{
                'name': c,
                'length': l
            } for c, l in lengths.items()],
            'xContigs':
            x_contigs,
            'yContigs':
            y_contigs,
            'mtContigs':
            mt_contigs,
            'par': [{
                'start': {
                    'contig': c,
                    'position': s
                },
                'end': {
                    'contig': c,
                    'position': e
                }
            } for (c, s, e) in par]
        }

        self._contigs = contigs
        self._lengths = lengths
        self._par_tuple = par
        self._par = [
            hl.Interval(hl.Locus(c, s, self), hl.Locus(c, e, self))
            for (c, s, e) in par
        ]
        self._global_positions = None

        ReferenceGenome._references[name] = self

        if not _builtin:
            Env.backend().add_reference(self._config)

        self._sequence_files = None
        self._liftovers = dict()

    def __str__(self):
        return self._config['name']

    def __repr__(self):
        return 'ReferenceGenome(name=%s, contigs=%s, lengths=%s, x_contigs=%s, y_contigs=%s, mt_contigs=%s, par=%s)' % \
               (self.name, self.contigs, self.lengths, self.x_contigs, self.y_contigs, self.mt_contigs, self._par_tuple)

    def __eq__(self, other):
        return isinstance(other,
                          ReferenceGenome) and self._config == other._config

    def __hash__(self):
        return hash(self.name)

    @property
    def name(self):
        """Name of reference genome.

        Returns
        -------
        :class:`str`
        """
        return self._config['name']

    @property
    def contigs(self):
        """Contig names.

        Returns
        -------
        :obj:`list` of :class:`str`
        """
        return self._contigs

    @property
    def lengths(self):
        """Dict of contig name to contig length.

        Returns
        -------
        :obj:`dict` of :class:`str` to :obj:`int`
        """
        return self._lengths

    @property
    def x_contigs(self):
        """X contigs.

        Returns
        -------
        :obj:`list` of :class:`str`
        """
        return self._config['xContigs']

    @property
    def y_contigs(self):
        """Y contigs.

        Returns
        -------
        :obj:`list` of :class:`str`
        """
        return self._config['yContigs']

    @property
    def mt_contigs(self):
        """Mitochondrial contigs.

        Returns
        -------
        :obj:`list` of :class:`str`
        """
        return self._config['mtContigs']

    @property
    def par(self):
        """Pseudoautosomal regions.

        Returns
        -------
        :obj:`list` of :class:`.Interval`
        """

        return self._par

    @typecheck_method(contig=str)
    def contig_length(self, contig):
        """Contig length.

        Parameters
        ----------
        contig : :class:`str`
            Contig name.

        Returns
        -------
        :obj:`int`
            Length of contig.
        """
        if contig in self.lengths:
            return self.lengths[contig]
        else:
            raise KeyError(
                "Contig `{}' is not in reference genome.".format(contig))

    @typecheck_method(contig=str)
    def _contig_global_position(self, contig):
        if self._global_positions is None:
            gp = {}
            lengths = self._lengths
            x = 0
            for c in self.contigs:
                gp[c] = x
                x += lengths[c]
            self._global_positions = gp
        return self._global_positions[contig]

    @classmethod
    @typecheck_method(path=str)
    def read(cls, path):
        """Load reference genome from a JSON file.

        Notes
        -----

        The JSON file must have the following format:

        .. code-block:: text

            {"name": "my_reference_genome",
             "contigs": [{"name": "1", "length": 10000000},
                         {"name": "2", "length": 20000000},
                         {"name": "X", "length": 19856300},
                         {"name": "Y", "length": 78140000},
                         {"name": "MT", "length": 532}],
             "xContigs": ["X"],
             "yContigs": ["Y"],
             "mtContigs": ["MT"],
             "par": [{"start": {"contig": "X","position": 60001},"end": {"contig": "X","position": 2699521}},
                     {"start": {"contig": "Y","position": 10001},"end": {"contig": "Y","position": 2649521}}]
            }


        `name` must be unique and not overlap with Hail's pre-instantiated
        references: ``'GRCh37'``, ``'GRCh38'``, ``'GRCm38'``, ``'CanFam3'``, and
        ``'default'``.
        The contig names in `xContigs`, `yContigs`, and `mtContigs` must be
        present in `contigs`. The intervals listed in `par` must have contigs in
        either `xContigs` or `yContigs` and must have positions between 0 and
        the contig length given in `contigs`.

        Parameters
        ----------
        path : :class:`str`
            Path to JSON file.

        Returns
        -------
        :class:`.ReferenceGenome`
        """
        with hl.hadoop_open(path) as f:
            return ReferenceGenome._from_config(json.load(f))

    @typecheck_method(output=str)
    def write(self, output):
        """"Write this reference genome to a file in JSON format.

        Examples
        --------

        >>> my_rg = hl.ReferenceGenome("new_reference", ["x", "y", "z"], {"x": 500, "y": 300, "z": 200})
        >>> my_rg.write(f"output/new_reference.json")

        Notes
        -----

        Use :meth:`~hail.genetics.ReferenceGenome.read` to reimport the exported
        reference genome in a new HailContext session.

        Parameters
        ----------
        output : :class:`str`
            Path of JSON file to write.
        """
        with hl.utils.hadoop_open(output, 'w') as f:
            json.dump(self._config, f)

    @typecheck_method(fasta_file=str, index_file=nullable(str))
    def add_sequence(self, fasta_file, index_file=None):
        """Load the reference sequence from a FASTA file.

        Examples
        --------
        Access the GRCh37 reference genome using :func:`~hail.get_reference`:

        >>> rg = hl.get_reference('GRCh37') # doctest: +SKIP

        Add a sequence file:

        >>> rg.add_sequence('gs://hail-common/references/human_g1k_v37.fasta.gz',
        ...                 'gs://hail-common/references/human_g1k_v37.fasta.fai') # doctest: +SKIP

        Add a sequence file with the default index location:

        >>> rg.add_sequence('gs://hail-common/references/human_g1k_v37.fasta.gz') # doctest: +SKIP


        Notes
        -----
        This method can only be run once per reference genome. Use
        :meth:`~has_sequence` to test whether a sequence is loaded.

        FASTA and index files are hosted on google cloud for some of Hail's built-in
        references:

        **GRCh37**

        - FASTA file: ``gs://hail-common/references/human_g1k_v37.fasta.gz``
        - Index file: ``gs://hail-common/references/human_g1k_v37.fasta.fai``

        **GRCh38**

        - FASTA file: ``gs://hail-common/references/Homo_sapiens_assembly38.fasta.gz``
        - Index file: ``gs://hail-common/references/Homo_sapiens_assembly38.fasta.fai``

        Public download links are available
        `here <https://console.cloud.google.com/storage/browser/hail-common/references/>`__.

        Parameters
        ----------
        fasta_file : :class:`str`
            Path to FASTA file. Can be compressed (GZIP) or uncompressed.
        index_file : :obj:`None` or :class:`str`
            Path to FASTA index file. Must be uncompressed. If `None`, replace
            the fasta_file's extension with `fai`.
        """
        if index_file is None:
            index_file = re.sub(r'\.[^.]*$', '.fai', fasta_file)
        Env.backend().add_sequence(self.name, fasta_file, index_file)
        self._sequence_files = (fasta_file, index_file)

    def has_sequence(self):
        """True if the reference sequence has been loaded.

        Returns
        -------
        :obj:`bool`
        """
        return self._sequence_files is not None

    def remove_sequence(self):
        """Remove the reference sequence."""
        self._sequence_files = None
        Env.backend().remove_sequence(self.name)

    @classmethod
    @typecheck_method(name=str,
                      fasta_file=str,
                      index_file=str,
                      x_contigs=oneof(str, sequenceof(str)),
                      y_contigs=oneof(str, sequenceof(str)),
                      mt_contigs=oneof(str, sequenceof(str)),
                      par=sequenceof(sized_tupleof(str, int, int)))
    def from_fasta_file(cls,
                        name,
                        fasta_file,
                        index_file,
                        x_contigs=[],
                        y_contigs=[],
                        mt_contigs=[],
                        par=[]):
        """Create reference genome from a FASTA file.

        Parameters
        ----------
        name: :class:`str`
            Name for new reference genome.
        fasta_file : :class:`str`
            Path to FASTA file. Can be compressed (GZIP) or uncompressed.
        index_file : :class:`str`
            Path to FASTA index file. Must be uncompressed.
        x_contigs : :class:`str` or :obj:`list` of :obj:`str`
            Contigs to be treated as X chromosomes.
        y_contigs : :class:`str` or :obj:`list` of :obj:`str`
            Contigs to be treated as Y chromosomes.
        mt_contigs : :class:`str` or :obj:`list` of :obj:`str`
            Contigs to be treated as mitochondrial DNA.
        par : :obj:`list` of :obj:`tuple` of (str, int, int)
            List of tuples with (contig, start, end)

        Returns
        -------
        :class:`.ReferenceGenome`
        """
        par_strings = [
            "{}:{}-{}".format(contig, start, end)
            for (contig, start, end) in par
        ]
        Env.backend().from_fasta_file(name, fasta_file, index_file, x_contigs,
                                      y_contigs, mt_contigs, par_strings)

        rg = ReferenceGenome._from_config(Env.backend().get_reference(name),
                                          _builtin=True)
        rg._sequence_files = (fasta_file, index_file)
        return rg

    @typecheck_method(dest_reference_genome=reference_genome_type)
    def has_liftover(self, dest_reference_genome):
        """``True`` if a liftover chain file is available from this reference
        genome to the destination reference.

        Parameters
        ----------
        dest_reference_genome : :class:`str` or :class:`.ReferenceGenome`

        Returns
        -------
        :obj:`bool`
        """
        return dest_reference_genome.name in self._liftovers

    @typecheck_method(dest_reference_genome=reference_genome_type)
    def remove_liftover(self, dest_reference_genome):
        """Remove liftover to `dest_reference_genome`.

        Parameters
        ----------
        dest_reference_genome : :class:`str` or :class:`.ReferenceGenome`
        """
        if dest_reference_genome.name in self._liftovers:
            del self._liftovers[dest_reference_genome.name]
            Env.backend().remove_liftover(self.name,
                                          dest_reference_genome.name)

    @typecheck_method(chain_file=str,
                      dest_reference_genome=reference_genome_type)
    def add_liftover(self, chain_file, dest_reference_genome):
        """Register a chain file for liftover.

        Examples
        --------
        Access GRCh37 and GRCh38 using :func:`~hail.get_reference`:

        >>> rg37 = hl.get_reference('GRCh37') # doctest: +SKIP
        >>> rg38 = hl.get_reference('GRCh38') # doctest: +SKIP

        Add a chain file from 37 to 38:

        >>> rg37.add_liftover('gs://hail-common/references/grch37_to_grch38.over.chain.gz', rg38) # doctest: +SKIP

        Notes
        -----
        This method can only be run once per reference genome. Use
        :meth:`~has_liftover` to test whether a chain file has been registered.

        The chain file format is described
        `here <https://genome.ucsc.edu/goldenpath/help/chain.html>`__.

        Chain files are hosted on google cloud for some of Hail's built-in
        references:

        **GRCh37 to GRCh38**
        gs://hail-common/references/grch37_to_grch38.over.chain.gz

        **GRCh38 to GRCh37**
        gs://hail-common/references/grch38_to_grch37.over.chain.gz

        Public download links are available
        `here <https://console.cloud.google.com/storage/browser/hail-common/references/>`__.

        Parameters
        ----------
        chain_file : :class:`str`
            Path to chain file. Can be compressed (GZIP) or uncompressed.
        dest_reference_genome : :class:`str` or :class:`.ReferenceGenome`
            Reference genome to convert to.
        """

        Env.backend().add_liftover(self.name, chain_file,
                                   dest_reference_genome.name)
        if dest_reference_genome.name in self._liftovers:
            raise KeyError(
                f"Liftover already exists from {self.name} to {dest_reference_genome.name}."
            )
        self._liftovers[dest_reference_genome.name] = chain_file
Ejemplo n.º 4
0
class HailContext(object):
    @typecheck_method(sc=nullable(SparkContext),
                      app_name=str,
                      master=nullable(str),
                      local=str,
                      log=nullable(str),
                      quiet=bool,
                      append=bool,
                      min_block_size=int,
                      branching_factor=int,
                      tmp_dir=nullable(str),
                      default_reference=str,
                      idempotent=bool,
                      global_seed=nullable(int),
                      spark_conf=nullable(dictof(str, str)),
                      optimizer_iterations=nullable(int),
                      _backend=nullable(Backend))
    def __init__(self, sc=None, app_name="Hail", master=None, local='local[*]',
                 log=None, quiet=False, append=False,
                 min_block_size=1, branching_factor=50, tmp_dir=None,
                 default_reference="GRCh37", idempotent=False,
                 global_seed=6348563392232659379, spark_conf=None,
                 optimizer_iterations=None, _backend=None):

        if Env._hc:
            if idempotent:
                return
            else:
                warn('Hail has already been initialized. If this call was intended to change configuration,'
                     ' close the session with hl.stop() first.')

        py_version = version()
        
        if log is None:
            log = hail.utils.timestamp_path(os.path.join(os.getcwd(), 'hail'),
                                            suffix=f'-{py_version}.log')
        self._log = log

        tmp_dir = get_env_or_default(tmp_dir, 'TMPDIR', '/tmp')
        self.tmp_dir = tmp_dir

        optimizer_iterations = get_env_or_default(optimizer_iterations, 'HAIL_OPTIMIZER_ITERATIONS', 3)

        if _backend is None:
            if os.environ.get('HAIL_APISERVER_URL') is not None:
                _backend = ServiceBackend()
            else:
                _backend = SparkBackend(
                    idempotent, sc, spark_conf, app_name, master, local, log,
                    quiet, append, min_block_size, branching_factor, tmp_dir,
                    optimizer_iterations)
        self._backend = _backend

        self._warn_cols_order = True
        self._warn_entries_order = True

        super(HailContext, self).__init__()

        # do this at the end in case something errors, so we don't raise the above error without a real HC
        Env._hc = self

        ReferenceGenome._from_config(_backend.get_reference('GRCh37'), True)
        ReferenceGenome._from_config(_backend.get_reference('GRCh38'), True)
        ReferenceGenome._from_config(_backend.get_reference('GRCm38'), True)
        ReferenceGenome._from_config(_backend.get_reference('CanFam3'), True)

        if default_reference in ReferenceGenome._references:
            self._default_ref = ReferenceGenome._references[default_reference]
        else:
            self._default_ref = ReferenceGenome.read(default_reference)

        if not quiet:
            sys.stderr.write(
                'Welcome to\n'
                '     __  __     <>__\n'
                '    / /_/ /__  __/ /\n'
                '   / __  / _ `/ / /\n'
                '  /_/ /_/\\_,_/_/_/   version {}\n'.format(py_version))

            if py_version.startswith('devel'):
                sys.stderr.write('NOTE: This is a beta version. Interfaces may change\n'
                                 '  during the beta period. We recommend pulling\n'
                                 '  the latest changes weekly.\n')
            sys.stderr.write(f'LOGGING: writing to {log}\n')

        install_exception_handler()
        Env.set_seed(global_seed)


    @property
    def default_reference(self):
        return self._default_ref

    def stop(self):
        self._backend.stop()
        self._backend = None
        Env._jvm = None
        Env._hc = None
        uninstall_exception_handler()
        Env._dummy_table = None
        Env._seed_generator = None
        hail.ir.clear_session_functions()
        ReferenceGenome._references = {}