Ejemplo n.º 1
0
def analyze(caller: str,
            expr: Expression,
            expected_indices: Indices,
            aggregation_axes: Set = set()):
    indices = expr._indices
    source = indices.source
    axes = indices.axes
    aggregations = expr._aggregations

    warnings = []
    errors = []

    expected_source = expected_indices.source
    expected_axes = expected_indices.axes

    if source is not None and source is not expected_source:
        errors.append(
            ExpressionException(
                '{} expects an expression from source {}, found expression derived from {}'
                .format(caller, expected_source, source)))

    # check for stray indices by subtracting expected axes from observed
    unexpected_axes = axes - expected_axes

    if unexpected_axes:
        # one or more out-of-scope fields
        refs = get_refs(expr)
        bad_refs = []
        for name, inds in refs.items():
            bad_axes = inds.axes.intersection(unexpected_axes)
            if bad_axes:
                bad_refs.append((name, inds))

        assert len(bad_refs) > 0
        errors.append(
            ExpressionException(
                "scope violation: '{caller}' expects an expression indexed by {expected}"
                "\n    Found indices {axes}, with unexpected indices {stray}. Invalid fields:{fields}{agg}"
                .format(
                    caller=caller,
                    expected=list(expected_axes),
                    axes=list(indices.axes),
                    stray=list(unexpected_axes),
                    fields=''.join("\n        '{}' (indices {})".format(
                        name, list(inds.axes)) for name, inds in bad_refs),
                    agg='' if (unexpected_axes - aggregation_axes) else
                    "\n    '{}' supports aggregation over axes {}, "
                    "so these fields may appear inside an aggregator function."
                    .format(caller, list(aggregation_axes)))))

    if aggregations:
        if aggregation_axes:

            # the expected axes of aggregated expressions are the expected axes + axes aggregated over
            expected_agg_axes = expected_axes.union(aggregation_axes)

            for agg in aggregations:
                assert isinstance(agg, Aggregation)
                refs = get_refs(*agg.exprs)
                agg_indices = agg.indices
                agg_axes = agg_indices.axes
                if agg_indices.source is not None and agg_indices.source is not expected_source:
                    errors.append(
                        ExpressionException(
                            'Expected an expression from source {}, found expression derived from {}'
                            '\n    Invalid fields: [{}]'.format(
                                expected_source, source,
                                ', '.join("'{}'".format(name)
                                          for name in refs))))

                # check for stray indices
                unexpected_agg_axes = agg_axes - expected_agg_axes
                if unexpected_agg_axes:
                    # one or more out-of-scope fields
                    bad_refs = []
                    for name, inds in refs.items():
                        bad_axes = inds.axes.intersection(unexpected_agg_axes)
                        if bad_axes:
                            bad_refs.append((name, inds))

                    assert len(bad_refs) > 0

                    errors.append(
                        ExpressionException(
                            "scope violation: '{caller}' supports aggregation over indices {expected}"
                            "\n    Found indices {axes}, with unexpected indices {stray}. Invalid fields:{fields}"
                            .format(caller=caller,
                                    expected=list(aggregation_axes),
                                    axes=list(agg_axes),
                                    stray=list(unexpected_agg_axes),
                                    fields=''.join(
                                        "\n        '{}' (indices {})".format(
                                            name, list(inds.axes))
                                        for name, inds in bad_refs))))
        else:
            errors.append(
                ExpressionException(
                    "'{}' does not support aggregation".format(caller)))

    for w in warnings:
        warn('{}'.format(w.msg))
    if errors:
        for e in errors:
            error('{}'.format(e.msg))
        raise errors[0]
Ejemplo n.º 2
0
def analyze(caller: str,
            expr: Expression,
            expected_indices: Indices,
            aggregation_axes: Set = set(),
            broadcast=True):
    indices = expr._indices
    source = indices.source
    axes = indices.axes
    aggregations = expr._aggregations

    warnings = []
    errors = []

    expected_source = expected_indices.source
    expected_axes = expected_indices.axes

    if source is not None and source is not expected_source:
        bad_refs = []
        for name, inds in get_refs(expr).items():
            if inds.source is not expected_source:
                bad_refs.append(name)
        errors.append(
            ExpressionException(
                "'{caller}': source mismatch\n"
                "  Expected an expression from source {expected}\n"
                "  Found expression derived from source {actual}\n"
                "  Problematic field(s): {bad_refs}\n\n"
                "  This error is commonly caused by chaining methods together:\n"
                "    >>> ht.distinct().select(ht.x)\n\n"
                "  Correct usage:\n"
                "    >>> ht = ht.distinct()\n"
                "    >>> ht = ht.select(ht.x)".format(
                    caller=caller,
                    expected=expected_source,
                    actual=source,
                    bad_refs=list(bad_refs))))

    # check for stray indices by subtracting expected axes from observed
    if broadcast:
        unexpected_axes = axes - expected_axes
        strictness = ''
    else:
        unexpected_axes = axes if axes != expected_axes else set()
        strictness = 'strictly '

    if unexpected_axes:
        # one or more out-of-scope fields
        refs = get_refs(expr)
        bad_refs = []
        for name, inds in refs.items():
            if broadcast:
                bad_axes = inds.axes.intersection(unexpected_axes)
                if bad_axes:
                    bad_refs.append((name, inds))
            else:
                if inds.axes != expected_axes:
                    bad_refs.append((name, inds))

        assert len(bad_refs) > 0
        errors.append(
            ExpressionException(
                "scope violation: '{caller}' expects an expression {strictness}indexed by {expected}"
                "\n    Found indices {axes}, with unexpected indices {stray}. Invalid fields:{fields}{agg}"
                .format(
                    caller=caller,
                    strictness=strictness,
                    expected=list(expected_axes),
                    axes=list(indices.axes),
                    stray=list(unexpected_axes),
                    fields=''.join("\n        '{}' (indices {})".format(
                        name, list(inds.axes)) for name, inds in bad_refs),
                    agg='' if (unexpected_axes - aggregation_axes) else
                    "\n    '{}' supports aggregation over axes {}, "
                    "so these fields may appear inside an aggregator function."
                    .format(caller, list(aggregation_axes)))))

    if aggregations:
        if aggregation_axes:

            # the expected axes of aggregated expressions are the expected axes + axes aggregated over
            expected_agg_axes = expected_axes.union(aggregation_axes)

            for agg in aggregations:
                assert isinstance(agg, Aggregation)
                refs = get_refs(*agg.exprs)
                agg_indices = agg.indices
                agg_axes = agg_indices.axes

                # check for stray indices
                unexpected_agg_axes = agg_axes - expected_agg_axes
                if unexpected_agg_axes:
                    # one or more out-of-scope fields
                    bad_refs = []
                    for name, inds in refs.items():
                        bad_axes = inds.axes.intersection(unexpected_agg_axes)
                        if bad_axes:
                            bad_refs.append((name, inds))

                    assert len(bad_refs) > 0

                    errors.append(
                        ExpressionException(
                            "scope violation: '{caller}' supports aggregation over indices {expected}"
                            "\n    Found indices {axes}, with unexpected indices {stray}. Invalid fields:{fields}"
                            .format(caller=caller,
                                    expected=list(aggregation_axes),
                                    axes=list(agg_axes),
                                    stray=list(unexpected_agg_axes),
                                    fields=''.join(
                                        "\n        '{}' (indices {})".format(
                                            name, list(inds.axes))
                                        for name, inds in bad_refs))))
        else:
            errors.append(
                ExpressionException(
                    "'{}' does not support aggregation".format(caller)))

    for w in warnings:
        warn('{}'.format(w.msg))
    if errors:
        for e in errors:
            error('{}'.format(e.msg))
        raise errors[0]
Ejemplo n.º 3
0
def analyze(caller: str,
            expr: Expression,
            expected_indices: Indices,
            aggregation_axes: Set = set(),
            broadcast=True):
    indices = expr._indices
    source = indices.source
    axes = indices.axes
    aggregations = expr._aggregations

    warnings = []
    errors = []

    expected_source = expected_indices.source
    expected_axes = expected_indices.axes

    if source is not None and source is not expected_source:
        bad_refs = []
        for name, inds in get_refs(expr).items():
            if inds.source is not expected_source:
                bad_refs.append(name)
        errors.append(
            ExpressionException("'{caller}': source mismatch\n"
                                "  Expected an expression from source {expected}\n"
                                "  Found expression derived from source {actual}\n"
                                "  Problematic field(s): {bad_refs}\n\n"
                                "  This error is commonly caused by chaining methods together:\n"
                                "    >>> ht.distinct().select(ht.x)\n\n"
                                "  Correct usage:\n"
                                "    >>> ht = ht.distinct()\n"
                                "    >>> ht = ht.select(ht.x)".format(
                caller=caller,
                expected=expected_source,
                actual=source,
                bad_refs=list(bad_refs)
            )))

    # check for stray indices by subtracting expected axes from observed
    if broadcast:
        unexpected_axes = axes - expected_axes
        strictness = ''
    else:
        unexpected_axes = axes if axes != expected_axes else set()
        strictness = 'strictly '

    if unexpected_axes:
        # one or more out-of-scope fields
        refs = get_refs(expr)
        bad_refs = []
        for name, inds in refs.items():
            if broadcast:
                bad_axes = inds.axes.intersection(unexpected_axes)
                if bad_axes:
                    bad_refs.append((name, inds))
            else:
                if inds.axes != expected_axes:
                    bad_refs.append((name, inds))

        assert len(bad_refs) > 0
        errors.append(ExpressionException(
            "scope violation: '{caller}' expects an expression {strictness}indexed by {expected}"
            "\n    Found indices {axes}, with unexpected indices {stray}. Invalid fields:{fields}{agg}".format(
                caller=caller,
                strictness=strictness,
                expected=list(expected_axes),
                axes=list(indices.axes),
                stray=list(unexpected_axes),
                fields=''.join("\n        '{}' (indices {})".format(name, list(inds.axes)) for name, inds in bad_refs),
                agg='' if (unexpected_axes - aggregation_axes) else
                "\n    '{}' supports aggregation over axes {}, "
                "so these fields may appear inside an aggregator function.".format(caller, list(aggregation_axes))
            )))

    if aggregations:
        if aggregation_axes:

            # the expected axes of aggregated expressions are the expected axes + axes aggregated over
            expected_agg_axes = expected_axes.union(aggregation_axes)

            for agg in aggregations:
                assert isinstance(agg, Aggregation)
                refs = get_refs(*agg.exprs)
                agg_indices = agg.indices
                agg_axes = agg_indices.axes

                # check for stray indices
                unexpected_agg_axes = agg_axes - expected_agg_axes
                if unexpected_agg_axes:
                    # one or more out-of-scope fields
                    bad_refs = []
                    for name, inds in refs.items():
                        bad_axes = inds.axes.intersection(unexpected_agg_axes)
                        if bad_axes:
                            bad_refs.append((name, inds))

                    assert len(bad_refs) > 0

                    errors.append(ExpressionException(
                        "scope violation: '{caller}' supports aggregation over indices {expected}"
                        "\n    Found indices {axes}, with unexpected indices {stray}. Invalid fields:{fields}".format(
                            caller=caller,
                            expected=list(aggregation_axes),
                            axes=list(agg_axes),
                            stray=list(unexpected_agg_axes),
                            fields=''.join("\n        '{}' (indices {})".format(
                                name, list(inds.axes)) for name, inds in bad_refs)
                        )
                    ))
        else:
            errors.append(ExpressionException("'{}' does not support aggregation".format(caller)))

    for w in warnings:
        warn('{}'.format(w.msg))
    if errors:
        for e in errors:
            error('{}'.format(e.msg))
        raise errors[0]