def analyze(caller: str, expr: Expression, expected_indices: Indices, aggregation_axes: Set = set()): indices = expr._indices source = indices.source axes = indices.axes aggregations = expr._aggregations warnings = [] errors = [] expected_source = expected_indices.source expected_axes = expected_indices.axes if source is not None and source is not expected_source: errors.append( ExpressionException( '{} expects an expression from source {}, found expression derived from {}' .format(caller, expected_source, source))) # check for stray indices by subtracting expected axes from observed unexpected_axes = axes - expected_axes if unexpected_axes: # one or more out-of-scope fields refs = get_refs(expr) bad_refs = [] for name, inds in refs.items(): bad_axes = inds.axes.intersection(unexpected_axes) if bad_axes: bad_refs.append((name, inds)) assert len(bad_refs) > 0 errors.append( ExpressionException( "scope violation: '{caller}' expects an expression indexed by {expected}" "\n Found indices {axes}, with unexpected indices {stray}. Invalid fields:{fields}{agg}" .format( caller=caller, expected=list(expected_axes), axes=list(indices.axes), stray=list(unexpected_axes), fields=''.join("\n '{}' (indices {})".format( name, list(inds.axes)) for name, inds in bad_refs), agg='' if (unexpected_axes - aggregation_axes) else "\n '{}' supports aggregation over axes {}, " "so these fields may appear inside an aggregator function." .format(caller, list(aggregation_axes))))) if aggregations: if aggregation_axes: # the expected axes of aggregated expressions are the expected axes + axes aggregated over expected_agg_axes = expected_axes.union(aggregation_axes) for agg in aggregations: assert isinstance(agg, Aggregation) refs = get_refs(*agg.exprs) agg_indices = agg.indices agg_axes = agg_indices.axes if agg_indices.source is not None and agg_indices.source is not expected_source: errors.append( ExpressionException( 'Expected an expression from source {}, found expression derived from {}' '\n Invalid fields: [{}]'.format( expected_source, source, ', '.join("'{}'".format(name) for name in refs)))) # check for stray indices unexpected_agg_axes = agg_axes - expected_agg_axes if unexpected_agg_axes: # one or more out-of-scope fields bad_refs = [] for name, inds in refs.items(): bad_axes = inds.axes.intersection(unexpected_agg_axes) if bad_axes: bad_refs.append((name, inds)) assert len(bad_refs) > 0 errors.append( ExpressionException( "scope violation: '{caller}' supports aggregation over indices {expected}" "\n Found indices {axes}, with unexpected indices {stray}. Invalid fields:{fields}" .format(caller=caller, expected=list(aggregation_axes), axes=list(agg_axes), stray=list(unexpected_agg_axes), fields=''.join( "\n '{}' (indices {})".format( name, list(inds.axes)) for name, inds in bad_refs)))) else: errors.append( ExpressionException( "'{}' does not support aggregation".format(caller))) for w in warnings: warn('{}'.format(w.msg)) if errors: for e in errors: error('{}'.format(e.msg)) raise errors[0]
def analyze(caller: str, expr: Expression, expected_indices: Indices, aggregation_axes: Set = set(), broadcast=True): indices = expr._indices source = indices.source axes = indices.axes aggregations = expr._aggregations warnings = [] errors = [] expected_source = expected_indices.source expected_axes = expected_indices.axes if source is not None and source is not expected_source: bad_refs = [] for name, inds in get_refs(expr).items(): if inds.source is not expected_source: bad_refs.append(name) errors.append( ExpressionException( "'{caller}': source mismatch\n" " Expected an expression from source {expected}\n" " Found expression derived from source {actual}\n" " Problematic field(s): {bad_refs}\n\n" " This error is commonly caused by chaining methods together:\n" " >>> ht.distinct().select(ht.x)\n\n" " Correct usage:\n" " >>> ht = ht.distinct()\n" " >>> ht = ht.select(ht.x)".format( caller=caller, expected=expected_source, actual=source, bad_refs=list(bad_refs)))) # check for stray indices by subtracting expected axes from observed if broadcast: unexpected_axes = axes - expected_axes strictness = '' else: unexpected_axes = axes if axes != expected_axes else set() strictness = 'strictly ' if unexpected_axes: # one or more out-of-scope fields refs = get_refs(expr) bad_refs = [] for name, inds in refs.items(): if broadcast: bad_axes = inds.axes.intersection(unexpected_axes) if bad_axes: bad_refs.append((name, inds)) else: if inds.axes != expected_axes: bad_refs.append((name, inds)) assert len(bad_refs) > 0 errors.append( ExpressionException( "scope violation: '{caller}' expects an expression {strictness}indexed by {expected}" "\n Found indices {axes}, with unexpected indices {stray}. Invalid fields:{fields}{agg}" .format( caller=caller, strictness=strictness, expected=list(expected_axes), axes=list(indices.axes), stray=list(unexpected_axes), fields=''.join("\n '{}' (indices {})".format( name, list(inds.axes)) for name, inds in bad_refs), agg='' if (unexpected_axes - aggregation_axes) else "\n '{}' supports aggregation over axes {}, " "so these fields may appear inside an aggregator function." .format(caller, list(aggregation_axes))))) if aggregations: if aggregation_axes: # the expected axes of aggregated expressions are the expected axes + axes aggregated over expected_agg_axes = expected_axes.union(aggregation_axes) for agg in aggregations: assert isinstance(agg, Aggregation) refs = get_refs(*agg.exprs) agg_indices = agg.indices agg_axes = agg_indices.axes # check for stray indices unexpected_agg_axes = agg_axes - expected_agg_axes if unexpected_agg_axes: # one or more out-of-scope fields bad_refs = [] for name, inds in refs.items(): bad_axes = inds.axes.intersection(unexpected_agg_axes) if bad_axes: bad_refs.append((name, inds)) assert len(bad_refs) > 0 errors.append( ExpressionException( "scope violation: '{caller}' supports aggregation over indices {expected}" "\n Found indices {axes}, with unexpected indices {stray}. Invalid fields:{fields}" .format(caller=caller, expected=list(aggregation_axes), axes=list(agg_axes), stray=list(unexpected_agg_axes), fields=''.join( "\n '{}' (indices {})".format( name, list(inds.axes)) for name, inds in bad_refs)))) else: errors.append( ExpressionException( "'{}' does not support aggregation".format(caller))) for w in warnings: warn('{}'.format(w.msg)) if errors: for e in errors: error('{}'.format(e.msg)) raise errors[0]
def analyze(caller: str, expr: Expression, expected_indices: Indices, aggregation_axes: Set = set(), broadcast=True): indices = expr._indices source = indices.source axes = indices.axes aggregations = expr._aggregations warnings = [] errors = [] expected_source = expected_indices.source expected_axes = expected_indices.axes if source is not None and source is not expected_source: bad_refs = [] for name, inds in get_refs(expr).items(): if inds.source is not expected_source: bad_refs.append(name) errors.append( ExpressionException("'{caller}': source mismatch\n" " Expected an expression from source {expected}\n" " Found expression derived from source {actual}\n" " Problematic field(s): {bad_refs}\n\n" " This error is commonly caused by chaining methods together:\n" " >>> ht.distinct().select(ht.x)\n\n" " Correct usage:\n" " >>> ht = ht.distinct()\n" " >>> ht = ht.select(ht.x)".format( caller=caller, expected=expected_source, actual=source, bad_refs=list(bad_refs) ))) # check for stray indices by subtracting expected axes from observed if broadcast: unexpected_axes = axes - expected_axes strictness = '' else: unexpected_axes = axes if axes != expected_axes else set() strictness = 'strictly ' if unexpected_axes: # one or more out-of-scope fields refs = get_refs(expr) bad_refs = [] for name, inds in refs.items(): if broadcast: bad_axes = inds.axes.intersection(unexpected_axes) if bad_axes: bad_refs.append((name, inds)) else: if inds.axes != expected_axes: bad_refs.append((name, inds)) assert len(bad_refs) > 0 errors.append(ExpressionException( "scope violation: '{caller}' expects an expression {strictness}indexed by {expected}" "\n Found indices {axes}, with unexpected indices {stray}. Invalid fields:{fields}{agg}".format( caller=caller, strictness=strictness, expected=list(expected_axes), axes=list(indices.axes), stray=list(unexpected_axes), fields=''.join("\n '{}' (indices {})".format(name, list(inds.axes)) for name, inds in bad_refs), agg='' if (unexpected_axes - aggregation_axes) else "\n '{}' supports aggregation over axes {}, " "so these fields may appear inside an aggregator function.".format(caller, list(aggregation_axes)) ))) if aggregations: if aggregation_axes: # the expected axes of aggregated expressions are the expected axes + axes aggregated over expected_agg_axes = expected_axes.union(aggregation_axes) for agg in aggregations: assert isinstance(agg, Aggregation) refs = get_refs(*agg.exprs) agg_indices = agg.indices agg_axes = agg_indices.axes # check for stray indices unexpected_agg_axes = agg_axes - expected_agg_axes if unexpected_agg_axes: # one or more out-of-scope fields bad_refs = [] for name, inds in refs.items(): bad_axes = inds.axes.intersection(unexpected_agg_axes) if bad_axes: bad_refs.append((name, inds)) assert len(bad_refs) > 0 errors.append(ExpressionException( "scope violation: '{caller}' supports aggregation over indices {expected}" "\n Found indices {axes}, with unexpected indices {stray}. Invalid fields:{fields}".format( caller=caller, expected=list(aggregation_axes), axes=list(agg_axes), stray=list(unexpected_agg_axes), fields=''.join("\n '{}' (indices {})".format( name, list(inds.axes)) for name, inds in bad_refs) ) )) else: errors.append(ExpressionException("'{}' does not support aggregation".format(caller))) for w in warnings: warn('{}'.format(w.msg)) if errors: for e in errors: error('{}'.format(e.msg)) raise errors[0]