def get_select_exprs(caller, exprs, named_exprs, indices, protect_keys=True): from hail.expr.expressions import to_expr, ExpressionException, TopLevelReference, Select exprs = [ to_expr(e) if not isinstance(e, str) else indices.source[e] for e in exprs ] named_exprs = {k: to_expr(v) for k, v in named_exprs.items()} assignments = OrderedDict() for e in exprs: if not e._indices == indices: raise ExpressionException( "method '{}' parameter 'exprs' expects {}-indexed fields," " found indices {}".format(caller, list(indices.axes), list(e._indices.axes))) if not e._ast.is_nested_field: raise ExpressionException( "method '{}' expects keyword arguments for complex expressions" .format(caller)) if protect_keys: check_keys(e._ast.name, indices) assignments[e._ast.name] = e for k, e in named_exprs.items(): if protect_keys: check_keys(k, indices) check_collisions(indices.source._fields, k, indices) assignments[k] = e check_field_uniqueness(assignments.keys()) return assignments
def check_collisions(caller, names, indices, override_protected_indices=None): from hail.expr.expressions import ExpressionException fields = indices.source._fields if override_protected_indices is not None: def invalid(e): return e._indices in override_protected_indices else: def invalid(e): return e._indices != indices # check collisions with fields on other axes for name in names: if name in fields and invalid(fields[name]): msg = f"{caller!r}: name collision with field indexed by {list(fields[name]._indices.axes)}: {name!r}" error('Analysis exception: {}'.format(msg)) raise ExpressionException(msg) # check duplicate fields for k, v in Counter(names).items(): if v > 1: from hail.expr.expressions import ExpressionException raise ExpressionException( f"{caller!r}: selection would produce duplicate field {k!r}")
def check_collisions(fields, name, indices): from hail.expr.expressions import ExpressionException if name in fields and not fields[name]._indices == indices: msg = 'name collision with field indexed by {}: {}'.format( list(fields[name]._indices.axes), name) error('Analysis exception: {}'.format(msg)) raise ExpressionException(msg)
def get_key_by_exprs(caller, exprs, named_exprs, indices, override_protected_indices=None): from hail.expr.expressions import to_expr, ExpressionException, analyze exprs = [indices.source[e] if isinstance(e, str) else e for e in exprs] named_exprs = {k: to_expr(v) for k, v in named_exprs.items()} bindings = [] def is_top_level_field(e): return e in indices.source._fields_inverse existing_key_fields = [] final_key = [] for e in exprs: analyze(caller, e, indices, broadcast=False) if not e._ir.is_nested_field: raise ExpressionException(f"{caller!r} expects keyword arguments for complex expressions\n" f" Correct: ht = ht.key_by('x')\n" f" Correct: ht = ht.key_by(ht.x)\n" f" Correct: ht = ht.key_by(x = ht.x.replace(' ', '_'))\n" f" INCORRECT: ht = ht.key_by(ht.x.replace(' ', '_'))") name = e._ir.name final_key.append(name) if not is_top_level_field(e): bindings.append((name, e)) else: existing_key_fields.append(name) final_key.extend(named_exprs) bindings.extend(named_exprs.items()) check_collisions(caller, final_key, indices, override_protected_indices=override_protected_indices) return final_key, dict(bindings)
def get_select_exprs(caller, exprs, named_exprs, indices, protect_keys=True): from hail.expr.expressions import to_expr, ExpressionException, analyze exprs = [ to_expr(e) if not isinstance(e, str) else indices.source[e] for e in exprs ] named_exprs = {k: to_expr(v) for k, v in named_exprs.items()} assignments = OrderedDict() for e in exprs: if not e._ir.is_nested_field: raise ExpressionException( "method '{}' expects keyword arguments for complex expressions" .format(caller)) analyze(caller, e, indices, broadcast=False) if protect_keys: check_keys(e._ir.name, indices) assignments[e._ir.name] = e for k, e in named_exprs.items(): if protect_keys: check_keys(k, indices) check_collisions(indices.source._fields, k, indices) assignments[k] = e check_field_uniqueness(assignments.keys()) return assignments
def check_keys(caller, name, protected_key): from hail.expr.expressions import ExpressionException if name in protected_key: msg = f"{caller!r}: cannot overwrite key field {name!r} with annotate, select or drop; " \ f"use key_by to modify keys." error('Analysis exception: {}'.format(msg)) raise ExpressionException(msg)
def check_keys(name, indices): from hail.expr.expressions import ExpressionException if indices.key is None: return if name in set(indices.key): msg = "cannot overwrite key field {} with annotate, select or drop; use key_by to modify keys.".format(repr(name)) error('Analysis exception: {}'.format(msg)) raise ExpressionException(msg)
def coerce(self, x) -> Expression: x = to_expr(x) if not self.can_coerce(x.dtype): raise ExpressionException(f"cannot coerce type '{x.dtype}' to type '{self.str_t}'") if self._requires_conversion(x.dtype): return self._coerce(x) else: return x
def get_annotate_exprs(caller, named_exprs, indices): from hail.expr.expressions import to_expr, ExpressionException named_exprs = {k: to_expr(v) for k, v in named_exprs.items()} for k, v in named_exprs.items(): check_keys(k, indices) if indices.key and k in indices.key.keys(): raise ExpressionException("'{}' cannot overwrite key field: {}" .format(caller, repr(k))) check_collisions(indices.source._fields, k, indices) return named_exprs
def get_select_exprs(caller, exprs, named_exprs, indices, base_struct): from hail.expr.expressions import to_expr, ExpressionException, analyze exprs = [indices.source[e] if isinstance(e, str) else e for e in exprs] named_exprs = {k: to_expr(v) for k, v in named_exprs.items()} select_fields = indices.protected_key[:] protected_key = set(select_fields) insertions = {} final_fields = select_fields[:] def is_top_level_field(e): return e in indices.source._fields_inverse for e in exprs: if not e._ir.is_nested_field: raise ExpressionException( f"{caller!r} expects keyword arguments for complex expressions\n" f" Correct: ht = ht.select('x')\n" f" Correct: ht = ht.select(ht.x)\n" f" Correct: ht = ht.select(x = ht.x.replace(' ', '_'))\n" f" INCORRECT: ht = ht.select(ht.x.replace(' ', '_'))") analyze(caller, e, indices, broadcast=False) name = e._ir.name check_keys(caller, name, protected_key) final_fields.append(name) if is_top_level_field(e): select_fields.append(name) else: insertions[name] = e for k, e in named_exprs.items(): check_keys(caller, k, protected_key) final_fields.append(k) insertions[k] = e check_collisions(caller, final_fields, indices) if final_fields == select_fields + list(insertions): # don't clog the IR with redundant field names s = base_struct.select(*select_fields).annotate(**insertions) else: s = base_struct.select(*select_fields)._annotate_ordered( insertions, final_fields) assert list(s) == final_fields return s
def when_missing(self, then) -> 'SwitchBuilder': """Add a test for missingness. If the `base` expression is missing, returns `then`. Parameters ---------- then : :class:`.Expression` Returns ------- :class:`.SwitchBuilder` Mutates and returns `self`. """ if self._when_missing_case is not None: raise ExpressionException("'when_missing' can only be called once") self._unify_type(then.dtype) self._when_missing_case = then return self
def or_missing(self): """Finish the case statement by returning missing. Notes ----- If no condition from a :meth:`.CaseBuilder.when` call is ``True``, then the result is missing. Parameters ---------- then : :class:`.Expression` Returns ------- :class:`.Expression` """ if len(self._cases) == 0: raise ExpressionException("'or_missing' cannot be called without at least one 'when' call") from hail.expr.functions import null return self._finish(null(self._ret_type))
def write_from_entry_expr(entry_expr, path, block_size=None): """Writes a block matrix from a matrix table entry expression. Notes ----- The resulting file can be loaded with :meth:`BlockMatrix.read`. Parameters ---------- entry_expr: :class:`.Float64Expression` Entry expression for numeric matrix entries. path: :obj:`str` Path for output. block_size: :obj:`int`, optional Block size. Default given by :meth:`.BlockMatrix.default_block_size`. """ if not block_size: block_size = BlockMatrix.default_block_size() source = entry_expr._indices.source if not isinstance(source, MatrixTable): raise ValueError( "Expect an expression of 'MatrixTable', found {}".format( "expression of '{}'".format(source.__class__) if source is not None else 'scalar expression')) if entry_expr._indices != source._entry_indices: from hail.expr.expressions import ExpressionException raise ExpressionException( "from_entry_expr: 'entry_expr' must be entry-indexed," " found indices {}".format(list(entry_expr._indices.axes))) if entry_expr in source._fields_inverse: source._jvds.writeBlockMatrix(path, source._fields_inverse[entry_expr], block_size) else: uid = Env.get_uid() source.select_entries(**{ uid: entry_expr })._jvds.writeBlockMatrix(path, uid, block_size)
def when_missing(self, then) -> 'SwitchBuilder': """Add a test for missingness. If the `base` expression is missing, returns `then`. Parameters ---------- then : :class:`.Expression` Returns ------- :class:`.SwitchBuilder` Mutates and returns `self`. """ if self._has_missing_branch: raise ExpressionException("'when_missing' can only be called once") self._unify_type(then.dtype) from hail.expr.functions import is_missing # need to insert at 0, because upstream missingness would propagate self._cases.insert(0, (is_missing(self._base), then)) return self
def or_error(self, message): """Finish the case statement by throwing an error with the given message. Notes ----- If no condition from a :meth:`.CaseBuilder.when` call is ``True``, then an error is thrown. Parameters ---------- message : :class:`.Expression` of type :obj:`.tstr` Returns ------- :class:`.Expression` """ if len(self._cases) == 0: raise ExpressionException( "'or_error' cannot be called without at least one 'when' call") error_expr = construct_expr(ir.Die(message._ir, self._ret_type), self._ret_type) return self._finish(error_expr)
def or_missing(self): """Finish the switch statement by returning missing. Notes ----- If no value from a :meth:`~.SwitchBuilder.when` call is matched, then the result is missing. Parameters ---------- then : :class:`.Expression` Returns ------- :class:`.Expression` """ if len(self._cases) == 0: raise ExpressionException( "'or_missing' cannot be called without at least one 'when' call" ) from hail.expr.functions import missing return self._finish(missing(self._ret_type))
def check_field_uniqueness(fields): for k, v in Counter(fields).items(): if v > 1: from hail.expr.expressions import ExpressionException raise ExpressionException("selection would produce duplicate field '{}'".format(repr(k)))