コード例 #1
0
ファイル: sort.py プロジェクト: stjordanis/hpat
def sort_distributed_analysis(sort_node, array_dists):

    in_arrs = sort_node.key_arrs + list(sort_node.df_in_vars.values())
    out_arrs = sort_node.out_key_arrs + list(sort_node.df_out_vars.values())
    # input columns have same distribution
    in_dist = Distribution.OneD
    for col_var in in_arrs:
        in_dist = Distribution(
            min(in_dist.value, array_dists[col_var.name].value))

    # output is 1D_Var due to shuffle, has to meet input dist
    # TODO: set to input dist in inplace case
    out_dist = Distribution(min(in_dist.value, Distribution.OneD_Var.value))
    for col_var in out_arrs:
        if col_var.name in array_dists:
            out_dist = Distribution(
                min(out_dist.value, array_dists[col_var.name].value))

    # output can cause input REP
    if out_dist != Distribution.OneD_Var:
        in_dist = out_dist

    # set dists
    for col_var in in_arrs:
        array_dists[col_var.name] = in_dist

    for col_var in out_arrs:
        array_dists[col_var.name] = out_dist

    # TODO: handle rebalance
    # assert not (in_dist == Distribution.OneD and out_dist == Distribution.OneD_Var)
    return
コード例 #2
0
def filter_distributed_analysis(filter_node, array_dists):

    # input columns have same distribution
    in_dist = Distribution.OneD
    for _, col_var in filter_node.df_in_vars.items():
        in_dist = Distribution(min(in_dist.value, array_dists[col_var.name].value))

    # bool arr
    in_dist = Distribution(min(in_dist.value, array_dists[filter_node.bool_arr.name].value))
    for _, col_var in filter_node.df_in_vars.items():
        array_dists[col_var.name] = in_dist
    array_dists[filter_node.bool_arr.name] = in_dist

    # output columns have same distribution
    out_dist = Distribution.OneD_Var
    for _, col_var in filter_node.df_out_vars.items():
        # output dist might not be assigned yet
        if col_var.name in array_dists:
            out_dist = Distribution(min(out_dist.value, array_dists[col_var.name].value))

    # out dist should meet input dist (e.g. REP in causes REP out)
    out_dist = Distribution(min(out_dist.value, in_dist.value))
    for _, col_var in filter_node.df_out_vars.items():
        array_dists[col_var.name] = out_dist

    # output can cause input REP
    if out_dist != Distribution.OneD_Var:
        array_dists[filter_node.bool_arr.name] = out_dist
        for _, col_var in filter_node.df_in_vars.items():
            array_dists[col_var.name] = out_dist

    return
コード例 #3
0
ファイル: hiframes_join.py プロジェクト: gitter-badger/hpat
def join_distributed_analysis(join_node, array_dists):

    # input columns have same distribution
    in_dist = Distribution.OneD
    for _, col_var in (list(join_node.left_vars.items()) +
                       list(join_node.right_vars.items())):
        in_dist = Distribution(
            min(in_dist.value, array_dists[col_var.name].value))

    # output columns have same distribution
    out_dist = Distribution.OneD_Var
    for _, col_var in join_node.df_out_vars.items():
        # output dist might not be assigned yet
        if col_var.name in array_dists:
            out_dist = Distribution(
                min(out_dist.value, array_dists[col_var.name].value))

    # out dist should meet input dist (e.g. REP in causes REP out)
    out_dist = Distribution(min(out_dist.value, in_dist.value))
    for _, col_var in join_node.df_out_vars.items():
        array_dists[col_var.name] = out_dist

    # output can cause input REP
    if out_dist != Distribution.OneD_Var:
        in_dist = out_dist

    # assign input distributions
    for _, col_var in (list(join_node.left_vars.items()) +
                       list(join_node.right_vars.items())):
        array_dists[col_var.name] = in_dist

    return
コード例 #4
0
def join_distributed_analysis(join_node, array_dists):

    # TODO: can columns of the same input table have diffrent dists?
    # left and right inputs can have 1D or 1D_Var seperately (q26 case)
    # input columns have same distribution
    left_dist = Distribution.OneD
    right_dist = Distribution.OneD
    for col_var in join_node.left_vars.values():
        left_dist = Distribution(
            min(left_dist.value, array_dists[col_var.name].value))

    for col_var in join_node.right_vars.values():
        right_dist = Distribution(
            min(right_dist.value, array_dists[col_var.name].value))

    # output columns have same distribution
    out_dist = Distribution.OneD_Var
    for col_var in join_node.df_out_vars.values():
        # output dist might not be assigned yet
        if col_var.name in array_dists:
            out_dist = Distribution(
                min(out_dist.value, array_dists[col_var.name].value))

    # out dist should meet input dist (e.g. REP in causes REP out)
    out_dist = Distribution(min(out_dist.value, left_dist.value))
    out_dist = Distribution(min(out_dist.value, right_dist.value))
    for col_var in join_node.df_out_vars.values():
        array_dists[col_var.name] = out_dist

    # output can cause input REP
    if out_dist != Distribution.OneD_Var:
        left_dist = out_dist
        right_dist = out_dist

    # assign input distributions
    for col_var in join_node.left_vars.values():
        array_dists[col_var.name] = left_dist

    for col_var in join_node.right_vars.values():
        array_dists[col_var.name] = right_dist

    return
コード例 #5
0
def filter_distributed_analysis(filter_node, array_dists):
    df_vars = filter_node.df_vars
    df_in_vars = df_vars[filter_node.df_in]
    df_out_vars = df_vars[filter_node.df_out]

    # input columns have same distribution
    in_dist = Distribution.OneD
    for _, col_var in df_in_vars.items():
        in_dist = Distribution(min(in_dist.value, array_dists[col_var.name].value))
    for _, col_var in df_in_vars.items():
        array_dists[col_var.name] = in_dist

    # output columns have same distribution
    out_dist = Distribution.OneD_Var
    for _, col_var in df_out_vars.items():
        # output dist might not be assigned yet
        if col_var.name in array_dists:
            out_dist = Distribution(min(out_dist.value, array_dists[col_var.name].value))
    for _, col_var in df_out_vars.items():
        array_dists[col_var.name] = out_dist

    return
コード例 #6
0
ファイル: hiframes_sort.py プロジェクト: raonyguimaraes/hpat
def sort_distributed_analysis(sort_node, array_dists):

    # input columns have same distribution
    in_dist = array_dists[sort_node.key_arr.name]
    for col_var in sort_node.df_vars.values():
        in_dist = Distribution(
            min(in_dist.value, array_dists[col_var.name].value))

    # set dists
    for col_var in sort_node.df_vars.values():
        array_dists[col_var.name] = in_dist
    array_dists[sort_node.key_arr.name] = in_dist
    return