Beispiel #1
0
def generate_optflow_fsl_token_when_two_vars(token, columns, result_value_name,
                                             group_by, non_aggregation_index,
                                             is_aggregation_part):
    """
    Generate the token which is accepted by the OptFlow FSL expression
    when the variable number is 2.

    Args:
        token (str): the string token.
        columns (list[str]): the column names of the source table.
        result_value_name (str): the result value name to be optimized.
        group_by (str): the column name to be grouped.
        non_aggregation_index (str): the index string inside the non
            aggregation part of the result expression.
        is_aggregation_part (bool): whether the token is inside the
            aggregation part of the result expression.

    Returns:
        A token which OptFlow FSL expression accepts.
    """
    if try_convert_to_aggregation_function(token):
        return try_convert_to_aggregation_function(token)

    if try_convert_comparision_token(token):
        return try_convert_comparision_token(token)

    if is_aggregation_part:
        if token == result_value_name:
            return '@X[i,j]'

        if token in columns:
            return '@input["%s"][i,j]' % token

        return token
    else:
        if token == result_value_name:
            raise ValueError("result value name %s should not appear "
                             "in non aggregation expression" % token)

        if token in columns:
            if not group_by:
                raise ValueError(
                    "column %s should not appear in non aggregation expression"
                    % token)

            return '@input["%s"][%s]' % (token, non_aggregation_index)

        return token
Beispiel #2
0
def generate_optflow_fsl_expr_when_two_vars(columns,
                                            tokens,
                                            variables,
                                            result_value_name,
                                            group_by=None):
    """
    Generate the OptFlow FSL expression when the variable number is 2.

    Args:
        columns (list[str]): the column names of the source table.
        tokens (list[str]): the objective or constraint string token list.
        variables (list[str]): the variable names to be optimized.
        result_value_name (str): the result value name to be optimized.
        group_by (str): the column name to be grouped.

    Returns:
        An OptFlow FSL expression.
    """
    assert len(variables) == 2
    has_aggregation_function = False
    for token in tokens:
        if try_convert_to_aggregation_function(token):
            has_aggregation_function = True
            break

    assert has_aggregation_function, "OptFlow only supports the aggregation " \
                                     "expression when there are 2 variables"

    tokens, variables, result_value_name, group_by = update_by_column_names(
        columns=columns,
        tokens=tokens,
        variables=variables,
        result_value_name=result_value_name,
        group_by=group_by)

    assert_are_valid_tokens(columns=columns,
                            tokens=tokens,
                            result_value_name=result_value_name,
                            group_by=group_by)

    if group_by and group_by not in variables:
        raise ValueError("GROUP BY column %s should be inside variables" %
                         group_by)

    if group_by == variables[0]:
        outer_range = "for i in @I"
        inner_range = "for j in @J"
        non_aggregation_index = "i,@J[0]"
    elif group_by == variables[1]:
        outer_range = "for j in @J"
        inner_range = "for i in @I"
        non_aggregation_index = "@I[0],j"
    else:
        outer_range = None
        inner_range = "for i in @I for j in @J"
        non_aggregation_index = None

    def generate_token(token, is_aggregation_part):
        return generate_optflow_fsl_token_when_two_vars(
            token=token,
            columns=columns,
            result_value_name=result_value_name,
            group_by=group_by,
            non_aggregation_index=non_aggregation_index,
            is_aggregation_part=is_aggregation_part)

    result_tokens = []
    idx = 0
    while idx < len(tokens):
        left_indices, right_indices, next_idx = \
            find_matched_aggregation_function_brackets(tokens, idx)
        assert len(left_indices) <= 1, \
            "OptFlow does not support nested aggregation calls"
        left_idx = left_indices[0] if left_indices else next_idx
        right_idx = right_indices[0] if right_indices else next_idx

        while idx < left_idx:
            result_tokens.append(generate_token(tokens[idx], False))
            idx += 1

        if left_idx == right_idx:
            continue

        while idx <= right_idx:
            if idx == left_idx:
                result_tokens.extend(['(', '['])
            elif idx == right_idx:
                result_tokens.extend([' ', inner_range, ']', ')'])
            else:
                result_tokens.append(generate_token(tokens[idx], True))
            idx += 1

        while idx < next_idx:
            result_tokens.append(generate_token(tokens[idx], False))
            idx += 1

    expr = "".join(result_tokens)
    if outer_range:
        return "%s: %s" % (outer_range, expr)
    else:
        return expr