Ejemplos de Graph en Python, ejemplos de ios.ir.Graph en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: ios_runtime.py Proyecto: wxbbuaa2011/inter-operator-scheduler

def graph_latency(graph: Graph,
                  batch_size=1,
                  warmup=2,
                  number=6,
                  repeat=6,
                  profile_stage=False):
    if lib is None:
        raise FileNotFoundError('IOS Backend library not found')
    num_stages = sum(len(b.stages) for b in graph.blocks)
    results_t = ctypes.c_float * int(repeat)
    results = results_t()
    stage_results_t = ctypes.c_float * int(repeat * num_stages)
    stage_results = stage_results_t()
    for i in range(repeat * num_stages):
        stage_results[i] = 0.0

    lib.graph_latency(
        c_string(json.dumps(graph.export_config())), ctypes.c_int(batch_size),
        ctypes.c_int(warmup), ctypes.c_int(number), ctypes.c_int(repeat),
        ctypes.c_int(1 if profile_stage else 0),
        ctypes.cast(results, ctypes.POINTER(ctypes.c_float)),
        ctypes.cast(stage_results, ctypes.POINTER(ctypes.c_float)))
    if profile_stage:
        return [float(v) for v in results], [float(v) for v in stage_results]
    else:
        return [float(v) for v in results]

Ejemplo n.º 2

0

Mostrar archivo

Archivo: ios_runtime.py Proyecto: wxbbuaa2011/inter-operator-scheduler

def graph_inference(graph: Graph, batch_size, input: np.ndarray):
    output = np.empty(shape=(batch_size,
                             *graph.blocks[-1].exit_node.output_shape),
                      dtype=np.float32)

    conv_nodes: List[Conv] = list(get_nodes_by_type(graph.nodes(), Conv))

    output_data = output.ctypes.data_as(ctypes.c_void_p)

    lib.graph_inference(c_string(json.dumps(graph.export_config())),
                        ctypes.c_int(batch_size),
                        input.ctypes.data_as(ctypes.c_void_p),
                        ctypes.c_int(len(conv_nodes)),
                        c_string_list([node.name for node in conv_nodes]),
                        c_ndarray_list([node.weight for node in conv_nodes]),
                        c_ndarray_list([node.bias for node in conv_nodes]),
                        output_data)
    return output

Ejemplo n.º 3

0

Mostrar archivo

Archivo: main.py Proyecto: wxyhv/inter-operator-scheduler

def main():
    logs = {}

    with open(f'schedules/{args.graph}.json', 'r') as f:
        graph = Graph.from_config(json.load(f))

    cost_model = IOSCostModel()
    name = graph.name
    graph_latency = cost_model.get_graph_latency(graph,
                                                 args.bs,
                                                 warmup=args.warmup,
                                                 number=args.number,
                                                 repeat=args.repeat)
    block_latency = [
        np.mean(
            cost_model.get_block_latency(block, args.bs, args.warmup,
                                         args.number, args.repeat))
        for block in graph.blocks
    ]
    logs[name] = {}
    logs[name]['latency'] = graph_latency
    logs[name]['mean'] = float(np.mean(graph_latency))
    logs[name]['std'] = float(np.std(graph_latency))
    logs[name]['block_latency'] = block_latency
    summary = summary_str(np.mean(graph_latency))
    print(summary)

    for bindex, block in enumerate(graph.blocks):
        block_dir = f'{expr_dir}/{name}_blocks'
        os.makedirs(block_dir, exist_ok=True)
        draw_block(
            block, f'{block_dir}/{bindex}.png',
            f'{name} block {bindex}, latency {block_latency[bindex]:.3f}')
    draw(graph,
         f"{expr_dir}/{name}.png",
         label=f'{name}, latency {float(np.mean(graph_latency)):.3f}')

    with open(f"{expr_dir}/{name}.json", "w") as f:
        json.dump(graph.export_config(), f, indent=2)
    with open(f'{expr_dir}/latency.json', 'w') as f:
        json.dump(logs, f, indent=2)
    with open(f'{expr_dir}/summary.txt', 'w') as f:
        f.write(summary + "\n")
    with open(f'{expr_dir}/arguments.txt', 'w') as f:
        json.dump(args.__dict__, f, indent=2)

Ejemplo n.º 4

0

Mostrar archivo

Archivo: optimizer.py Proyecto: wxbbuaa2011/inter-operator-scheduler

def optimize(graph: Graph,
             batch_size=1,
             cost_model: Optional[CostModel] = None,
             opt_type: str = 'dp_parallel_merge',
             warmup=2,
             number=6,
             repeat=6,
             max_num_groups=8,
             max_part_size=100,
             compute_weight=False,
             max_group_size=3,
             debug_dp_info=None,
             verbose=False) -> Graph:
    """
    Optimize the computation graph and generate a highly optimized schedule.

    :param graph: Graph
        The computation graph that is going to be optimized.

    :param batch_size: int
        The batch size that IOS optimizes for.

    :param cost_model: ios.cost_model.CostModel
        The cost model used to measure the latency of stages. It can be either ios.cost_model.IOSCostModel() or
        ios.cost_model.RandomCostModel(). The latter one can be used to generate random schedule. The default cost model
        is ios.cost_model.IOSCostModel().

    :param opt_type: str, one of 'dp_parallel_merge', 'dp_parallel', and 'dp_merge'
        The optimization type, which specifies the parallelization strategy that can be used in the optimization. There
        are two parallelization strategy for each stage: 'operator merge' and 'concurrent execution'.
            When opt_type='dp_parallel', only use 'concurrent execution'.
            When opt_type='dp_merge', only use 'operator merge'.
            When op_type='dp_parallel_merge', use both strategies.

    :param warmup: int, default 2
        The number of warm-ups for the stage latency measurement.

    :param number: int, default 6
        The number of execution in one repeat during the latency measurement.

    :param repeat: int, default 6
        The number of repeats during the latency measurement. There are totally warmup + number * repeat times
        executions of a stage in a stage latency measurement. The average latency is used to estimate the stage latency.

    :param max_num_groups: int, default 8
        The maximum number of parallel groups in a stage.

    :param max_part_size: int, default 100
        When the number of schedule operators in a block is larger than max_part_size, the block is split into
        multiple parts. Each part has this maximum number of schedule operators.

    :param compute_weight: boolean, default False
        Compute the weights for the optimized computation graph. It is not necessary to compute the weights when we only
        want to measure the latency of optimized computation graph. However, when you want to execute the optimized
        computation graph in real weights and data, set it to True.

    :param max_group_size: int, default 3
        The maximum size of group in each stage.

    :param debug_dp_info: None or a dict, default None
        Get the debug information of the optimization. The debug information is stored in the dict when it is not None.

    :param verbose: boolean, default False
        Print verbose information during optimization.

    :return: Graph
        Return the optimized computation graph and execution schedule.
    """
    if cost_model is None:
        cost_model = IOSCostModel()
    graph_enter = Placeholder(graph.input.name, graph.input.hint_name,
                              graph.input.output_shape)
    graph_enter.output_shape = graph.enter_node.output_shape
    blocks = []
    on_debug = debug_dp_info is not None
    if on_debug:
        assert isinstance(debug_dp_info, dict)
        debug_dp_info['dp'] = {}
        debug_dp_info['stage_latency'] = {}
        debug_dp_info['#states'] = []
        debug_dp_info['#transitions'] = []
        debug_dp_info['#schedules'] = []
        debug_dp_info['width'] = []
        debug_dp_info['#operators'] = []
        debug_dp_info['meta'] = []
    log(f"optimize {opt_type} on {graph.name}", verbose)
    assert 'parallel' in opt_type or 'merge' in opt_type

    for bindex, block in enumerate(graph.blocks):
        all_nodes = block.inner_nodes + [block.exit_node]
        node_parts = block.parts

        nid: Dict[Node, int] = {node: i for i, node in enumerate(all_nodes)}
        idn: Dict[int, Node] = {i: node for i, node in enumerate(all_nodes)}

        if node_parts is None:
            node_parts = []
            for idx_part in range(
                (len(all_nodes) + max_part_size - 1) // max_part_size):
                begin = idx_part * max_part_size
                end = min((idx_part + 1) * max_part_size, len(all_nodes))
                node_parts.append([all_nodes[i] for i in range(begin, end)])
        log(
            f"block {bindex} with {len(all_nodes)} nodes {len(node_parts)} parts",
            verbose)

        stage_list = []

        for part_index, npart in enumerate(node_parts):
            log(f"part {part_index} with {len(npart)} nodes", verbose)
            ipart = [nid[nd] for nd in npart]

            dp: Dict[int, float] = {}
            ep: Dict[int, Tuple[List[int], str]] = {}
            merge_latency: Dict[int, float] = {}
            parallel_latency: Dict[int, float] = {}
            part_graph = build_graph(npart, nid)
            chains = graph_chain_decomposition(part_graph)

            max_num_endings = functools.reduce(
                operator.mul, [len(chain) + 1 for chain in chains])
            if verbose:
                print(f"#Chains: {len(chains)}")
                print(f"Max number of endings: {max_num_endings}")

            if on_debug:
                debug_dp_info['#states'].append(0)
                debug_dp_info['#transitions'].append(0)
                debug_dp_info['#schedules'].append(0)
                debug_dp_info['width'].append(0)
                debug_dp_info['#operators'].append(len(npart))
                debug_dp_info['meta'].append({0: 1})

            ustate = sum(1 << i for i in ipart)
            dop(ustate, block, chains, on_debug, debug_dp_info, idn, nid, dp,
                ep, opt_type, max_group_size, max_num_groups, merge_latency,
                parallel_latency, cost_model, batch_size, warmup, number,
                repeat)
            stage_list.extend(get_stage_list(ep, ustate))

            if on_debug:
                for ss in parallel_latency:
                    stage_name = ' '.join(
                        ['parallel'] + [nd.name for nd in state2nset(ss, idn)])
                    debug_dp_info['stage_latency'][
                        stage_name] = parallel_latency[ss]
                for ss in merge_latency:
                    stage_name = ' '.join(
                        ['merge'] + [nd.name for nd in state2nset(ss, idn)])
                    debug_dp_info['stage_latency'][stage_name] = merge_latency[
                        ss]
                for s in dp:
                    state_name = ' '.join(nd.name for nd in state2nset(s, idn))
                    stage_name = ' '.join(nd.name for nd in state2nset(
                        sum(1 << u for u in itertools.chain(*ep[s][0])), idn))
                    debug_dp_info['dp'][
                        state_name] = f'{dp[s]:.3f} ({ep[s][1]} {stage_name})'
                debug_dp_info['#schedules'][-1] = debug_dp_info['meta'][-1][
                    ustate]

        new_block = construct(stage_list, block, blocks, graph_enter, idn, nid,
                              compute_weight)
        blocks.append(new_block)

    new_graph = Graph(graph.name + "_" + opt_type, graph_enter, blocks)
    new_graph.infer_shape()
    return new_graph