def _impute_type(x, partial_type): from hail.genetics import Locus, Call from hail.utils import Interval, Struct def refine(t, refined): if t is None: return refined if not isinstance(t, type(refined)): raise ExpressionException( "Incompatible partial_type, {}, for value {}".format( partial_type, x)) return t if isinstance(x, Expression): return x.dtype elif isinstance(x, bool): return tbool elif isinstance(x, int): if hl.tint32.min_value <= x <= hl.tint32.max_value: return tint32 elif hl.tint64.min_value <= x <= hl.tint64.max_value: return tint64 else: raise ValueError( "Hail has no integer data type large enough to store {}". format(x)) elif isinstance(x, float): return tfloat64 elif isinstance(x, str): return tstr elif isinstance(x, Locus): return tlocus(x.reference_genome) elif isinstance(x, Interval): return tinterval(x.point_type) elif isinstance(x, Call): return tcall elif isinstance(x, Struct) or isinstance(x, dict) and isinstance( partial_type, tstruct): partial_type = refine(partial_type, hl.tstruct()) t = tstruct(**{k: _impute_type(x[k], partial_type.get(k)) for k in x}) return t elif isinstance(x, tuple): partial_type = refine(partial_type, hl.ttuple()) return ttuple(*[ _impute_type( element, partial_type[index] if index < len(partial_type) else None) for index, element in enumerate(x) ]) elif isinstance(x, list): partial_type = refine(partial_type, hl.tarray(None)) if len(x) == 0: return partial_type ts = { _impute_type(element, partial_type.element_type) for element in x } unified_type = super_unify_types(*ts) if unified_type is None: raise ExpressionException( "Hail does not support heterogeneous arrays: " "found list with elements of types {} ".format(list(ts))) return tarray(unified_type) elif is_setlike(x): partial_type = refine(partial_type, hl.tset(None)) if len(x) == 0: return partial_type ts = { _impute_type(element, partial_type.element_type) for element in x } unified_type = super_unify_types(*ts) if not unified_type: raise ExpressionException( "Hail does not support heterogeneous sets: " "found set with elements of types {} ".format(list(ts))) return tset(unified_type) elif isinstance(x, Mapping): user_partial_type = partial_type partial_type = refine(partial_type, hl.tdict(None, None)) if len(x) == 0: return partial_type kts = { _impute_type(element, partial_type.key_type) for element in x.keys() } vts = { _impute_type(element, partial_type.value_type) for element in x.values() } unified_key_type = super_unify_types(*kts) unified_value_type = super_unify_types(*vts) if not unified_key_type: raise ExpressionException( "Hail does not support heterogeneous dicts: " "found dict with keys {} of types {} ".format( list(x.keys()), list(kts))) if not unified_value_type: if unified_key_type == hl.tstr and user_partial_type is None: return tstruct(**{k: _impute_type(x[k], None) for k in x}) raise ExpressionException( "Hail does not support heterogeneous dicts: " "found dict with values of types {} ".format(list(vts))) return tdict(unified_key_type, unified_value_type) elif isinstance(x, np.generic): return from_numpy(x.dtype) elif isinstance(x, np.ndarray): element_type = from_numpy(x.dtype) return tndarray(element_type, x.ndim) elif x is None or pd.isna(x): return partial_type elif isinstance( x, (hl.expr.builders.CaseBuilder, hl.expr.builders.SwitchBuilder)): raise ExpressionException( "'switch' and 'case' expressions must end with a call to either" "'default' or 'or_missing'") else: raise ExpressionException( "Hail cannot automatically impute type of {}: {}".format( type(x), x))
def impute_type(x): from hail.genetics import Locus, Call from hail.utils import Interval, Struct if isinstance(x, Expression): return x.dtype elif isinstance(x, bool): return tbool elif isinstance(x, int): if hl.tint32.min_value <= x <= hl.tint32.max_value: return tint32 elif hl.tint64.min_value <= x <= hl.tint64.max_value: return tint64 else: raise ValueError( "Hail has no integer data type large enough to store {}". format(x)) elif isinstance(x, float): return tfloat64 elif isinstance(x, str): return tstr elif isinstance(x, Locus): return tlocus(x.reference_genome) elif isinstance(x, Interval): return tinterval(x.point_type) elif isinstance(x, Call): return tcall elif isinstance(x, Struct): return tstruct(**{k: impute_type(x[k]) for k in x}) elif isinstance(x, tuple): return ttuple(*(impute_type(element) for element in x)) elif isinstance(x, list): if len(x) == 0: raise ExpressionException( "Cannot impute type of empty list. Use 'hl.empty_array' to create an empty array." ) ts = {impute_type(element) for element in x} unified_type = unify_types_limited(*ts) if not unified_type: raise ExpressionException( "Hail does not support heterogeneous arrays: " "found list with elements of types {} ".format(list(ts))) return tarray(unified_type) elif isinstance(x, set): if len(x) == 0: raise ExpressionException( "Cannot impute type of empty set. Use 'hl.empty_set' to create an empty set." ) ts = {impute_type(element) for element in x} unified_type = unify_types_limited(*ts) if not unified_type: raise ExpressionException( "Hail does not support heterogeneous sets: " "found set with elements of types {} ".format(list(ts))) return tset(unified_type) elif isinstance(x, dict): if len(x) == 0: raise ExpressionException( "Cannot impute type of empty dict. Use 'hl.empty_dict' to create an empty dict." ) kts = {impute_type(element) for element in x.keys()} vts = {impute_type(element) for element in x.values()} unified_key_type = unify_types_limited(*kts) unified_value_type = unify_types_limited(*vts) if not unified_key_type: raise ExpressionException( "Hail does not support heterogeneous dicts: " "found dict with keys of types {} ".format(list(kts))) if not unified_value_type: raise ExpressionException( "Hail does not support heterogeneous dicts: " "found dict with values of types {} ".format(list(vts))) return tdict(unified_key_type, unified_value_type) elif isinstance(x, np.ndarray): element_type = from_numpy(x.dtype) return tndarray(element_type, x.ndim) elif x is None: raise ExpressionException("Hail cannot impute the type of 'None'") elif isinstance( x, (hl.expr.builders.CaseBuilder, hl.expr.builders.SwitchBuilder)): raise ExpressionException( "'switch' and 'case' expressions must end with a call to either" "'default' or 'or_missing'") else: raise ExpressionException( "Hail cannot automatically impute type of {}: {}".format( type(x), x))