def get_report(loop_op: tf.Operation,
               infeed_queue_initializer: tf.Operation,
               outfeed_op: tf.Operation,
               report_dest: str,
               available_memory_proportion: Optional[float] = 0.6) -> None:
    """Generate report from running model on IPU and save to disk.

    Args:
        loop_op: Inference op to generate report on.
        infeed_queue_initializer: Initializer for the infeed queue
        outfeed_op: Outfeed operator.
        report_dest: Location to store report.
        available_memory_proportion: Proportion of tile memory available as temporary memory
        for matmul and convolution execution

    """
    # Set compile and device options
    os.environ["TF_POPLAR_FLAGS"] += " --use_ipu_model"
    use_poplar_text_report = report_mode == 'text'
    opts = ipu_utils.create_ipu_config(
        profiling=True,
        use_poplar_text_report=use_poplar_text_report,
        profile_execution=True)
    opts = ipu_utils.set_matmul_options(opts,
                                        matmul_options={
                                            "availableMemoryProportion":
                                            str(available_memory_proportion)
                                        })
    opts = ipu_utils.set_convolution_options(
        opts,
        convolution_options={
            "availableMemoryProportion": str(available_memory_proportion)
        })
    ipu_utils.auto_select_ipus(opts, [1])
    ipu_utils.configure_ipu_system(opts)

    with tf.device('cpu'):
        report = gen_ipu_ops.ipu_event_trace()

    run_options = tf.RunOptions(report_tensor_allocations_upon_oom=True)
    session = tf.Session()
    session.run(infeed_queue_initializer)
    session.run(loop_op, options=run_options)
    session.run(outfeed_op, options=run_options)
    out = session.run(report)
    if report_mode == 'text':
        # extract the report
        rep = ipu_utils.extract_all_strings_from_event_trace(out)
        logging.info("Writing profiling report to %s" % report_dest)
        with open(report_dest, "w") as f:
            f.write(rep)
    else:
        save_tf_report(out)
Exemple #2
0
def generate_report(graph):
    print(f'Generating training report... {graph.report}')
    report = graph.session.run(graph.report)
    compilation_report = ipu_utils.extract_compile_reports(report)
    execution_report = ipu_utils.extract_execute_reports(report)

    with open("report.txt", "w") as f:
        f.write(ipu_utils.extract_all_strings_from_event_trace(report))
    with open("compilation_report.json", "w") as f:
        json.dump(compilation_report, f)
    with open("execution_report.json", "w") as f:
        json.dump(execution_report, f)
    print('Reports saved to .')
Exemple #3
0
def generate_report(batch_size: int,
                    report_dest: str = "./densenet_report.txt") -> None:
    """Generate report from running model on IPU

    Args:
        batch_size: Batch size for inference
        report_dest: Location to save generated text report

    """
    # Set compile and device options
    os.environ['TF_POPLAR_FORCE_IPU_MODEL'] = "1"
    opts = utils.create_ipu_config(profiling=True, use_poplar_text_report=True)
    utils.auto_select_ipus(opts, [1])
    utils.configure_ipu_system(opts)
    output_probs = construct_graph(batch_size)

    with tf.device('cpu'):
        report = gen_ipu_ops.ipu_event_trace()

    run_options = tf.RunOptions(report_tensor_allocations_upon_oom=True)
    with tf.Session() as session:
        session.run(output_probs,
                    feed_dict={
                        "optimized/image_input:0":
                        np.zeros(
                            (batch_size, IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS),
                            dtype=np.float16)
                    },
                    options=run_options)
        out = session.run(report)

    # extract the report
    rep = utils.extract_all_strings_from_event_trace(out)
    logging.info("Writing densenet profiling report to %s" % report_dest)
    with open(report_dest, "w") as f:
        f.write(rep)
Exemple #4
0
        # Run the main graph
        session.run(logits, feed_dict={x: training_data})
        # Execute the event trace op: the result is a list of trace event serialized protobufs.
        raw_report = session.run(trace)
        # These objects can be converted to strings with utility functions, as shown below.
        ext = ".json" if args.json_report else ".txt"
        if args.split_reports:
            compile_reports = utils.extract_compile_reports(raw_report)
            execution_reports = utils.extract_execute_reports(raw_report)
            # These are lists, as long as the number of graphs profiled, except that the
            # execution_reports list will be empty if execution profiling is not enabled.
            # You could save only the last (i.e. relative to the main graph); in this case we save everything.
            with open("compile" + ext, "w", encoding="utf-8") as f:
                for report in compile_reports:
                    # Each element of the list is a tuple of 2 elements:
                    # the first is a string representing an auto-generated name of the xla graph
                    # the second is a string containing the actual report relative to the graph
                    xla_name, report_string = report
                    f.write(xla_name + "\n")
                    f.write(report_string + "\n")
            if len(execution_reports) > 0:
                with open("execution" + ext, "w", encoding="utf-8") as f:
                    for report in execution_reports:
                        xla_name, report_string = report
                        f.write(xla_name + "\n")
                        f.write(report_string + "\n")
        else:
            report = utils.extract_all_strings_from_event_trace(raw_report)
            with open("report" + ext, "w", encoding="utf-8") as f:
                f.write(report)
Exemple #5
0
 start = time.time()
 sess.run(inference_output)
 convolution_predictions = sess.run(outfeed)
 # convolution_predictions = sess.run(inference_output, feed_dict={input_image: np_image})
 raw_output = sess.run(
     decoder, feed_dict={input_detection: convolution_predictions[0]})
 filtered_output = process_detections(raw_output)
 draw_detections(original_image, original_image_dims[0],
                 original_image_dims[1], filtered_output)
 print("Done running inference.")
 duration = time.time() - start
 print("Duration: {:.3f} seconds\n".format(duration))
 if REPORT:
     rep_out = sess.run(report)
     save_tf_report(rep_out)
     rep = utils.extract_all_strings_from_event_trace(rep_out)
     with open(
             str(WIDTH) + "x" + str(HEIGHT) + "_ipus" + str(NUM_IPUS) +
             "_ssd_report.txt", "w") as f:
         f.write(rep)
 # Performance runs
 print("Executing...")
 for iter_count in range(N_ITERATIONS):
     print("Running iteration: ", iter_count)
     # Run
     start = time.time()
     sess.run(inference_output)
     convolution_predictions = sess.run(outfeed)
     raw_output = sess.run(
         decoder, feed_dict={input_detection: convolution_predictions[0]})
     filtered_output = process_detections(raw_output)
Exemple #6
0
def train(replication_factor, batch_size, batch_per_step, profile, num_iter,
          time_steps):
    """Launch training."""

    # Set up in-feeds for the data
    with tf.device('cpu'):
        data_generator = EnvGenerator(batch_size, time_steps)
        items = next(data_generator)
        output_types = tuple((tf.dtypes.as_dtype(i.dtype) for i in items))
        output_shapes = tuple((tf.TensorShape(i.shape) for i in items))
        total_bytes = 0
        for i in items:
            total_bytes += i.nbytes
        print(f'Input data size = {total_bytes/1000000} MB/batch')
        dataset = tf.data.Dataset.from_generator(data_generator,
                                                 output_types=output_types,
                                                 output_shapes=output_shapes)
        infeed_queue = ipu_infeed_queue.IPUInfeedQueue(
            dataset, "InfeedQueue", replication_factor=replication_factor)
        data_init = infeed_queue.initializer

    # Compile loss op
    with ipu_scope("/device:IPU:0"):
        total_loss = ipu_compiler.compile(
            lambda: loops.repeat(batch_per_step,
                                 build_train_op,
                                 infeed_queue=infeed_queue,
                                 inputs=[tf.constant(0.0, dtype=DTYPE)]))
    # Set up report op optionally.
    if profile:
        with tf.device('cpu'):
            report = gen_ipu_ops.ipu_event_trace()

    # Set up session on IPU
    opts = utils.create_ipu_config(
        profiling=profile,
        use_poplar_text_report=use_poplar_text_report,
        profile_execution=profile,
        merge_infeed_io_copies=True)
    opts = utils.set_optimization_options(
        opts, max_cross_replica_sum_buffer_size=10000000)
    opts = utils.auto_select_ipus(opts, [replication_factor])
    utils.configure_ipu_system(opts)
    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True,
                                            log_device_placement=True))

    # Initialize variables
    utils.move_variable_initialization_to_cpu()
    sess.run([tf.global_variables_initializer(), data_init])

    # Run training and time
    total_time = 0.0
    total_samples = 0
    skip_iterations = 5  # Initially the infeed may buffer extra input data and
    # first run for IPU includes XLA compile, so skipping these iterations for calculating items/sec.
    for iters in range(num_iter):
        data_generator.reset_counter()
        t0 = time.perf_counter()
        sess.run(total_loss)
        t1 = time.perf_counter()

        if profile:
            raw_reports = sess.run(report)
            if use_poplar_text_report:
                # extract the report
                rep = utils.extract_all_strings_from_event_trace(raw_reports)
                print("Writing profiling report to %s" % report_dest)
                with open(report_dest, "w") as f:
                    f.write(rep)
            else:
                os.makedirs('profile_rl', exist_ok=True)
                save_tf_report(raw_reports, log_dir='profile_rl')
                print("Writing profiling report to profile_rl")
            break

        if iters > skip_iterations:
            total_time += (t1 - t0)
            total_samples += (batch_size * batch_per_step * replication_factor)
            print("Average %.1f items/sec" % (total_samples / total_time))
Exemple #7
0
def extract_runtimes_from_report(report, display=True):
    """Returns timing information from IpuTraceEvent

    report -- Array of text encoded IpuTraceEvent

    """
    if len(report) is 0:
        return

    # Timings from tf xla event timestamps
    from tensorflow.compiler.plugin.poplar.driver.trace_pb2 import IpuTraceEvent

    # Retrieve IpuEvents, poplar report and cycles
    events = list(map(IpuTraceEvent.FromString, report))
    report = utils.extract_all_strings_from_event_trace(report)
    m = list(map(int, re.findall("Program cycles\s*:\s*([\d\.]+)", report)))

    global start_time
    first = start_time == 0
    if first:
        start_time = events[0].timestamp
        events = events[1:]
    evt_str = "\nIPU Timings\n"
    exec_num = 0

    for evt in events:
        extra_str = ""
        if evt.type == IpuTraceEvent.COMPILE_BEGIN:
            continue
        elif evt.type == IpuTraceEvent.COMPILE_END:
            evt_name = "Compile"
        elif evt.type == IpuTraceEvent.HOST_TO_DEVICE_TRANSFER:
            evt_name = "Host->Device"
            extra_str = "\n  Tensors:"
            transfered_tensors = json.loads(
                evt.data_transfer.data_transfer.decode("utf-8"))
            for t in transfered_tensors["tensors"]:
                extra_str += "\n    handle: {:>6}, size: {}".format(
                    t["name"], t["size"])
            extra_str += "\n  Total_size: {}".format(
                transfered_tensors["total_size"])
        elif evt.type == IpuTraceEvent.DEVICE_TO_HOST_TRANSFER:
            evt_name = "Device->Host"
            extra_str = "\n  Tensors:"
            transfered_tensors = json.loads(
                evt.data_transfer.data_transfer.decode("utf-8"))
            for t in transfered_tensors["tensors"]:
                extra_str += "\n    handle: {:>6}, size: {}".format(
                    t["name"], t["size"])
            extra_str += "\n  Total_size: {}".format(
                transfered_tensors["total_size"])
        elif evt.type == IpuTraceEvent.LOAD_ENGINE:
            evt_name = "Load engine"
        elif evt.type == IpuTraceEvent.EXECUTE:
            evt_name = "Execute"

            if m and m[exec_num]:
                execution_time = float(m[exec_num]) / (
                    1 * 1000 * 1000 * 1000)  # Implied 1GHz clock speed
                extra_str = "\n  Execution Time: {:.3g}s".format(
                    execution_time)
                extra_str += "\n  Streaming Time: {:.3g}s".format(
                    (evt.timestamp - start_time) - execution_time)
                exec_num += 1
        else:
            evt_name = "Unknown event"
        evt_str += "{:<15s}: {:<8.3g} s   {}\n".format(
            evt_name, (evt.timestamp - start_time), extra_str)
        start_time = evt.timestamp

    # Print Cycle count from poplar report
    evt_str += "\nCycle counts on IPU\n"
    for execution_num, execution_cycles in enumerate(m):
        evt_str += "Execution {} cycles : {}\n".format(execution_num,
                                                       execution_cycles)
    if display:
        print(evt_str)
    # Write Report to file
    if first:
        with open("report.txt", "w") as f:
            f.write(report)
        print("\nWritten to file: report.txt")
Exemple #8
0
def basic_graph(pa, pb, pc):
    # Do basic addition with tensors
    o1 = pa + pb
    o2 = pa + pc
    simple_graph_output = o1 + o2
    return simple_graph_output


with ipu_scope("/device:IPU:0"):
    result = basic_graph(pa, pb, pc)

with tf.Session() as sess:
    # Run the graph through the session feeding it an arbitrary dictionary
    result = sess.run(result,
                      feed_dict={
                          pa: [1., 1.],
                          pb: [0., 1.],
                          pc: [1., 5.]
                      })

    # Generate report based on the event run in session
    trace_out = sess.run(report)
    trace_report = utils.extract_all_strings_from_event_trace(trace_out)

    # Write trace report to file
    with open('Trace_Event_Report.rep', "w") as f:
        f.write(trace_report)

    # Print the result
    print(result)