Ejemplo n.º 1
0
def get_node_by_treewidth_reduction(graph):
    """
    Returns a list of pairs (node : reduction_in_treewidth) for the
    provided graph. The graph is **ASSUMED** to be in the optimal
    elimination order, e.g. the nodes have to be relabelled by
    peo

    Parameters
    ----------
    graph : networkx.Graph without self-loops and parallel edges

    Returns
    -------
    nodes_by_treewidth_reduction : dict
    """
    # Get flop cost of the bucket elimination
    initial_treewidth = get_treewidth_from_peo(
        graph, sorted(graph.nodes))

    nodes_by_treewidth_reduction = []
    for node in graph.nodes(data=False):
        reduced_graph = copy.deepcopy(graph)
        # Take out one node
        remove_node(reduced_graph, node)

        treewidth = get_treewidth_from_peo(
            reduced_graph, sorted(reduced_graph.nodes))
        delta = initial_treewidth - treewidth

        nodes_by_treewidth_reduction.append((node, delta))

    return nodes_by_treewidth_reduction
Ejemplo n.º 2
0
def get_node_by_mem_reduction(old_graph):
    """
    Returns a list of pairs (node : reduction_in_flop_cost) for the
    provided graph. The graph is **ASSUMED** to be in the optimal
    elimination order, e.g. the nodes have to be relabelled by
    peo

    Parameters
    ----------
    graph : networkx.Graph without self-loops and parallel edges

    Returns
    -------
    nodes_by_mem_reduction : dict
    """

    graph = copy.deepcopy(old_graph)

    # Get flop cost of the bucket elimination
    initial_mem, initial_flop = get_contraction_costs(graph)

    nodes_by_mem_reduction = []
    for node in graph.nodes(data=False):
        reduced_graph = copy.deepcopy(graph)
        # Take out one node
        remove_node(reduced_graph, node)
        mem, flop = get_contraction_costs(reduced_graph)
        delta = sum(initial_mem) - sum(mem)

        nodes_by_mem_reduction.append((node, delta))

    return nodes_by_mem_reduction
Ejemplo n.º 3
0
def split_graph_random(old_graph, n_var_parallel=0):
    """
    Splits a graphical model with randomly chosen nodes
    to parallelize over.

    Parameters
    ----------
    old_graph : networkx.Graph
                graph to contract (after eliminating variables which
                are parallelized over)
    n_var_parallel : int
                number of variables to eliminate by parallelization

    Returns
    -------
    idx_parallel : list of Idx
          variables removed by parallelization
    graph : networkx.Graph
          new graph without parallelized variables
    """
    graph = copy.deepcopy(old_graph)

    indices = [var for var in graph.nodes(data=False)]
    idx_parallel = np.random.choice(
        indices, size=n_var_parallel, replace=False)

    idx_parallel_var = [Var(var, size=graph.nodes[var])
                        for var in idx_parallel]

    for idx in idx_parallel:
        remove_node(graph, idx)

    log.info("Removed indices by parallelization:\n{}".format(idx_parallel))
    log.info("Removed {} variables".format(len(idx_parallel)))
    peo, treewidth = get_peo(graph)

    return sorted(idx_parallel_var, key=int), graph
Ejemplo n.º 4
0
def split_graph_by_metric_greedy(
        old_graph, n_var_parallel=0,
        metric_fn=get_node_by_treewidth_reduction,
        greedy_step_by=1, forbidden_nodes=(), peo_function=get_peo):
    """
    This function splits graph by greedily selecting next nodes
    up to the n_var_parallel
    using the metric function and recomputing PEO after
    each node elimination

    Parameters
    ----------
    old_graph : networkx.Graph or networkx.MultiGraph
                graph to split by parallelizing over variables
                and to contract

                Parallel edges and self-loops in the graph are
                removed (if any) before the calculation of metric

    n_var_parallel : int
                number of variables to eliminate by parallelization
    metric_fn : function, optional
                function to evaluate node metric.
                Default get_node_by_mem_reduction
    greedy_step_by : int, default 1
                Step size for the greedy algorithm

    forbidden_nodes : iterable, optional
                nodes in this list will not be considered
                for deletion. Default ().
    peo_function: function
           function to calculate PEO. Should have signature
           lambda (graph): return peo, treewidth

    Returns
    -------
    idx_parallel : list
          variables removed by parallelization
    graph : networkx.Graph
          new graph without parallelized variables
    """
    # import pdb
    # pdb.set_trace()

    # convert everything to int
    forbidden_nodes = [int(var) for var in forbidden_nodes]

    # Simplify graph
    graph = get_simple_graph(old_graph)

    idx_parallel = []
    idx_parallel_var = []

    steps = [greedy_step_by] * (n_var_parallel // greedy_step_by)
    # append last batch to steps
    steps.append(n_var_parallel
                 - (n_var_parallel // greedy_step_by) * greedy_step_by)

    for n_parallel in steps:
        # Get optimal order - recalculate treewidth
        peo, tw = peo_function(graph)
        graph_optimal, inverse_order = relabel_graph_nodes(
            graph, dict(zip(peo, sorted(graph.nodes))))

        # get nodes by metric in descending order
        nodes_by_metric_optimal = metric_fn(graph_optimal)
        nodes_by_metric_optimal.sort(
            key=lambda pair: pair[1], reverse=True)

        # filter out forbidden nodes and get nodes in original order
        nodes_by_metric_allowed = []
        for node, metric in nodes_by_metric_optimal:
            if inverse_order[node] not in forbidden_nodes:
                nodes_by_metric_allowed.append(
                    (inverse_order[node], metric))

        # Take first nodes by cost and map them back to original
        # order
        nodes_with_cost = nodes_by_metric_allowed[:n_parallel]
        if len(nodes_with_cost) > 0:
            nodes, costs = zip(*nodes_with_cost)
        else:
            nodes = []

        # Update list and update graph
        idx_parallel += nodes
        # create var objects from nodes
        idx_parallel_var += [Var(var, size=graph.nodes[var]['size'])
                             for var in nodes]
        for node in nodes:
            remove_node(graph, node)

    return idx_parallel_var, graph
Ejemplo n.º 5
0
def split_graph_with_mem_constraint_greedy(
        old_graph,
        n_var_parallel_min=0,
        mem_constraint=defs.MAXIMAL_MEMORY,
        step_by=5,
        n_var_parallel_max=None,
        metric_fn=get_node_by_mem_reduction,
        forbidden_nodes=(),
        peo_function=get_peo):
    """
    This function splits graph by greedily selecting next nodes
    up to the n_var_parallel
    using the metric function and recomputing PEO after
    each node elimination. The graph is **ASSUMED** to be in
    the perfect elimination order

    Parameters
    ----------
    old_graph : networkx.Graph()
           initial contraction graph
    n_var_parallel_min : int
           minimal number of variables to split the task to
    mem_constraint : int
           Upper limit on memory per task
    metric_function : function, optional
           function to rank nodes for elimination
    step_by : int, optional
           scan the metric function with this step
    n_var_parallel_max : int, optional
           constraint on the maximal number of parallelized
           variables. Default None
    forbidden_nodes: iterable, default ()
           nodes forbidden for parallelization
    peo_function: function
           function to calculate PEO. Should have signature
           lambda (graph): return peo, treewidth
    Returns
    -------
    idx_parallel : list
             list of removed variables
    graph : networkx.Graph
             reduced contraction graph
    """
    # convert everything to int
    forbidden_nodes = [int(var) for var in forbidden_nodes]

    graph = copy.deepcopy(old_graph)
    n_var_total = old_graph.number_of_nodes()
    if n_var_parallel_max is None:
        n_var_parallel_max = n_var_total

    mem_cost, flop_cost = get_contraction_costs(graph)
    max_mem = sum(mem_cost)

    idx_parallel = []
    idx_parallel_var = []

    steps = list(range(0, n_var_parallel_max, step_by))
    if len(steps) == 0 or (n_var_parallel_max % step_by != 0):
        steps.append(n_var_parallel_max)

    steps = [step_by] * (n_var_parallel_max // step_by)
    # append last batch to steps
    steps.append(n_var_parallel_max
                 - (n_var_parallel_max // step_by) * step_by)

    for n_parallel in steps:
        # Get optimal order
        peo, tw = peo_function(graph)
        graph_optimal, inverse_order = relabel_graph_nodes(
            graph, dict(zip(peo, range(len(peo)))))

        # get nodes by metric in descending order
        nodes_by_metric_optimal = metric_fn(graph_optimal)
        nodes_by_metric_optimal.sort(
            key=lambda pair: pair[1], reverse=True)

        nodes_by_metric_allowed = []
        for node, metric in nodes_by_metric_optimal:
            if inverse_order[node] not in forbidden_nodes:
                nodes_by_metric_allowed.append(
                    (inverse_order[node], metric))

        # Take first nodes by cost and map them back to original
        # order
        nodes_with_cost = nodes_by_metric_allowed[:n_parallel]
        if len(nodes_with_cost) > 0:
            nodes, costs = zip(*nodes_with_cost)
        else:
            nodes = []

        # Update list and update graph
        idx_parallel += nodes

        # create var objects from nodes
        idx_parallel_var += [Var(var, size=graph.nodes[var]['size'])
                             for var in nodes]

        for node in nodes:
            remove_node(graph, node)

        # Renumerate graph nodes to be consequtive ints (may be redundant)
        label_dict = dict(zip(sorted(graph.nodes),
                              range(len(graph.nodes()))))

        graph_relabelled, _ = relabel_graph_nodes(graph, label_dict)
        mem_cost, flop_cost = get_contraction_costs(graph_relabelled)

        max_mem = sum(mem_cost)

        if (max_mem <= mem_constraint
           and len(idx_parallel) >= n_var_parallel_min):
            break

    if max_mem > mem_constraint:
        raise ValueError('Maximal memory constraint is not met')

    return idx_parallel_var, graph
Ejemplo n.º 6
0
def split_graph_by_metric(
        old_graph, n_var_parallel=0,
        metric_fn=get_node_by_degree,
        forbidden_nodes=()):
    """
    Parallel-splitted version of :py:meth:`get_peo` with nodes
    to split chosen according to the metric function. Metric
    function should take a graph and return a list of pairs
    (node : metric_value)

    Parameters
    ----------
    old_graph : networkx.Graph or networkx.MultiGraph
                graph to split by parallelizing over variables
                and to contract

                Parallel edges and self-loops in the graph are
                removed (if any) before the calculation of metric

    n_var_parallel : int
                number of variables to eliminate by parallelization
    metric_fn : function, optional
                function to evaluate node metric.
                Default get_node_by_degree
    forbidden_nodes : iterable, optional
                nodes in this list will not be considered
                for deletion. Default ().
    Returns
    -------
    idx_parallel : list
          variables removed by parallelization
    graph : networkx.Graph
          new graph without parallelized variables
    """
    # graph = get_simple_graph(old_graph)
    # import pdb
    # pdb.set_trace()
    graph = copy.deepcopy(old_graph)

    # convert everything to int
    forbidden_nodes = [int(var) for var in forbidden_nodes]

    # get nodes by metric in descending order
    nodes_by_metric = metric_fn(graph)
    nodes_by_metric.sort(key=lambda pair: int(pair[1]), reverse=True)

    nodes_by_metric_allowed = []
    for node, metric in nodes_by_metric:
        if node not in forbidden_nodes:
            nodes_by_metric_allowed.append((node, metric))

    idx_parallel = []
    for ii in range(n_var_parallel):
        node, metric = nodes_by_metric_allowed[ii]
        idx_parallel.append(node)

    # create var objects from nodes
    idx_parallel_var = [Var(var, size=graph.nodes[var]['size'])
                        for var in idx_parallel]

    for idx in idx_parallel:
        remove_node(graph, idx)

    log.info("Removed indices by parallelization:\n{}".format(idx_parallel))
    log.info("Removed {} variables".format(len(idx_parallel)))

    return idx_parallel_var, graph