Ejemplo n.º 1
0
    def create_exec_infer_model(self, model_dir, labels, num_requests=2):

        model_xml = os.path.join(model_dir, 'frozen_inference_graph.xml')
        model_bin = os.path.join(model_dir, 'frozen_inference_graph.bin')
        exported_model = os.path.join(model_dir, 'exported_model')

        assert os.path.isfile(model_bin)
        assert os.path.isfile(model_xml)

        ie = IECore()
        net = IENetwork(model=model_xml, weights=model_bin)

        # return ExecInferModel()

        img_info_input_blob = None
        feed_dict = {}
        for blob_name in net.inputs:
            if len(net.inputs[blob_name].shape) == 4:
                input_blob = blob_name
            elif len(net.inputs[blob_name].shape) == 2:
                img_info_input_blob = blob_name
            else:
                raise RuntimeError(
                    "Unsupported {}D input layer '{}'. Only 2D and 4D input layers are supported"
                    .format(len(net.inputs[blob_name].shape), blob_name))

        assert len(
            net.outputs) == 1, "Demo supports only single output topologies"
        out_blob = next(iter(net.outputs))

        if os.path.isfile(exported_model):  # found exported mode
            print('found model to import')
            exec_net = ie.import_network(model_file=exported_model,
                                         device_name=self.device)
        else:
            print('creating exec model')
            exec_net = ie.load_network(network=net,
                                       num_requests=num_requests,
                                       device_name=self.device)
            exec_net.export(exported_model)

        n, c, h, w = net.inputs[input_blob].shape
        if img_info_input_blob:
            feed_dict[img_info_input_blob] = [h, w, 1]

        del net
        del ie

        return ExecInferModel(exec_net, input_blob, out_blob, feed_dict, n, c,
                              h, w, num_requests, labels)
Ejemplo n.º 2
0
class Benchmark:
    def __init__(self,
                 device: str,
                 number_infer_requests: int = None,
                 number_iterations: int = None,
                 duration_seconds: int = None,
                 api_type: str = 'async'):
        self.device = device
        self.ie = IECore()
        self.nireq = number_infer_requests
        self.niter = number_iterations
        self.duration_seconds = get_duration_seconds(duration_seconds,
                                                     self.niter, self.device)
        self.api_type = api_type

    def __del__(self):
        del self.ie

    def add_extension(self,
                      path_to_extension: str = None,
                      path_to_cldnn_config: str = None):
        if path_to_cldnn_config:
            self.ie.set_config({'CONFIG_FILE': path_to_cldnn_config},
                               GPU_DEVICE_NAME)
            logger.info(
                'GPU extensions is loaded {}'.format(path_to_cldnn_config))

        if path_to_extension:
            self.ie.add_extension(extension_path=path_to_extension,
                                  device_name=CPU_DEVICE_NAME)
            logger.info(
                'CPU extensions is loaded {}'.format(path_to_extension))

    def get_version_info(self) -> str:
        logger.info('InferenceEngine:\n{: <9}{:.<24} {}'.format(
            '', 'API version', get_version()))
        version_string = 'Device info\n'
        for device, version in self.ie.get_versions(self.device).items():
            version_string += '{: <9}{}\n'.format('', device)
            version_string += '{: <9}{:.<24}{} {}.{}\n'.format(
                '', version.description, ' version', version.major,
                version.minor)
            version_string += '{: <9}{:.<24} {}\n'.format(
                '', 'Build', version.build_number)
        return version_string

    def set_config(self, config={}):
        for device in config.keys():
            self.ie.set_config(config[device], device)

    def read_network(self, path_to_model: str):
        model_filename = os.path.abspath(path_to_model)
        head, ext = os.path.splitext(model_filename)
        weights_filename = os.path.abspath(
            head + BIN_EXTENSION) if ext == XML_EXTENSION else ""
        ie_network = self.ie.read_network(model_filename, weights_filename)
        return ie_network

    def load_network(self, ie_network: IENetwork, config={}):
        exe_network = self.ie.load_network(
            ie_network,
            self.device,
            config=config,
            num_requests=1 if self.api_type == 'sync' else self.nireq or 0)
        # Number of requests
        self.nireq = len(exe_network.requests)

        return exe_network

    def import_network(self, path_to_file: str, config={}):
        exe_network = self.ie.import_network(
            model_file=path_to_file,
            device_name=self.device,
            config=config,
            num_requests=1 if self.api_type == 'sync' else self.nireq or 0)
        # Number of requests
        self.nireq = len(exe_network.requests)
        return exe_network

    def first_infer(self, exe_network):
        infer_request = exe_network.requests[0]

        # warming up - out of scope
        if self.api_type == 'sync':
            infer_request.infer()
        else:
            infer_request.async_infer()
            status = exe_network.wait()
            if status != StatusCode.OK:
                raise Exception(
                    "Wait for all requests is failed with status code {}!".
                    format(status))
        return infer_request.latency

    def infer(self, exe_network, batch_size, progress_bar=None):
        progress_count = 0
        infer_requests = exe_network.requests

        start_time = datetime.utcnow()
        exec_time = 0
        iteration = 0

        times = []
        in_fly = set()
        # Start inference & calculate performance
        # to align number if iterations to guarantee that last infer requests are executed in the same conditions **/
        while (self.niter and iteration < self.niter) or \
              (self.duration_seconds and exec_time < self.duration_seconds) or \
              (self.api_type == 'async' and iteration % self.nireq):
            if self.api_type == 'sync':
                infer_requests[0].infer()
                times.append(infer_requests[0].latency)
            else:
                infer_request_id = exe_network.get_idle_request_id()
                if infer_request_id < 0:
                    status = exe_network.wait(num_requests=1)
                    if status != StatusCode.OK:
                        raise Exception("Wait for idle request failed!")
                    infer_request_id = exe_network.get_idle_request_id()
                    if infer_request_id < 0:
                        raise Exception("Invalid request id!")
                if infer_request_id in in_fly:
                    times.append(infer_requests[infer_request_id].latency)
                else:
                    in_fly.add(infer_request_id)
                infer_requests[infer_request_id].async_infer()
            iteration += 1

            exec_time = (datetime.utcnow() - start_time).total_seconds()

            if progress_bar:
                if self.duration_seconds:
                    # calculate how many progress intervals are covered by current iteration.
                    # depends on the current iteration time and time of each progress interval.
                    # Previously covered progress intervals must be skipped.
                    progress_interval_time = self.duration_seconds / progress_bar.total_num
                    new_progress = int(exec_time / progress_interval_time -
                                       progress_count)
                    progress_bar.add_progress(new_progress)
                    progress_count += new_progress
                elif self.niter:
                    progress_bar.add_progress(1)

        # wait the latest inference executions
        status = exe_network.wait()
        if status != StatusCode.OK:
            raise Exception(
                "Wait for all requests is failed with status code {}!".format(
                    status))

        total_duration_sec = (datetime.utcnow() - start_time).total_seconds()
        for infer_request_id in in_fly:
            times.append(infer_requests[infer_request_id].latency)
        times.sort()
        latency_ms = median(times)
        fps = batch_size * 1000 / latency_ms if self.api_type == 'sync' else batch_size * iteration / total_duration_sec
        if progress_bar:
            progress_bar.finish()
        return fps, latency_ms, total_duration_sec, iteration
Ejemplo n.º 3
0
def main():
    log.basicConfig(format='[ %(levelname)s ] %(message)s', level=log.INFO, stream=sys.stdout)
    args = parse_args()

# ---------------------------Step 1. Initialize inference engine core--------------------------------------------------
    log.info('Creating Inference Engine')
    ie = IECore()

# ---------------------------Step 2. Read a model in OpenVINO Intermediate Representation---------------
    if args.model:
        log.info(f'Reading the network: {args.model}')
        # .xml and .bin files
        net = ie.read_network(model=args.model)

# ---------------------------Step 3. Configure input & output----------------------------------------------------------
        log.info('Configuring input and output blobs')
        # Mark layers from args.output_layers as outputs
        if args.output_layers:
            net.add_outputs(get_output_layer_list(net, args, with_ports=True))

        # Get names of input and output blobs
        input_blobs = get_input_layer_list(net, args)
        output_blobs = get_output_layer_list(net, args, with_ports=False)

        # Set input and output precision manually
        for blob_name in input_blobs:
            net.input_info[blob_name].precision = 'FP32'

        for blob_name in output_blobs:
            net.outputs[blob_name].precision = 'FP32'

        net.batch_size = args.batch_size

# ---------------------------Step 4. Loading model to the device-------------------------------------------------------
    devices = args.device.replace('HETERO:', '').split(',')
    plugin_config = {}

    if 'GNA' in args.device:
        gna_device_mode = devices[0] if '_' in devices[0] else 'GNA_AUTO'
        devices[0] = 'GNA'

        plugin_config['GNA_DEVICE_MODE'] = gna_device_mode
        plugin_config['GNA_PRECISION'] = f'I{args.quantization_bits}'

        # Set a GNA scale factor
        if args.import_gna_model:
            if args.scale_factor:
                log.warning(f'Custom scale factor will be used for imported GNA model: {args.import_gna_model}')
                set_scale_factors(plugin_config, parse_scale_factors(args))
            else:
                log.info(f'Using scale factor from the imported GNA model: {args.import_gna_model}')
        else:
            if args.scale_factor:
                set_scale_factors(plugin_config, parse_scale_factors(args))
            else:
                scale_factors = []

                for file_name in re.split(', |,', args.input):
                    first_utterance = next(iter(read_utterance_file(file_name).values()))
                    scale_factors.append(get_scale_factor(first_utterance))

                log.info('Using scale factor(s) calculated from first utterance')
                set_scale_factors(plugin_config, scale_factors)

        if args.export_embedded_gna_model:
            plugin_config['GNA_FIRMWARE_MODEL_IMAGE'] = args.export_embedded_gna_model
            plugin_config['GNA_FIRMWARE_MODEL_IMAGE_GENERATION'] = args.embedded_gna_configuration

        if args.performance_counter:
            plugin_config['PERF_COUNT'] = 'YES'

    device_str = f'HETERO:{",".join(devices)}' if 'HETERO' in args.device else devices[0]

    log.info('Loading the model to the plugin')
    if args.model:
        exec_net = ie.load_network(net, device_str, plugin_config)
    else:
        exec_net = ie.import_network(args.import_gna_model, device_str, plugin_config)
        input_blobs = get_input_layer_list(exec_net, args)
        output_blobs = get_output_layer_list(exec_net, args, with_ports=False)

    if args.input:
        input_files = re.split(', |,', args.input)

        if len(input_blobs) != len(input_files):
            log.error(f'Number of network inputs ({len(input_blobs)}) is not equal '
                      f'to number of ark files ({len(input_files)})')
            sys.exit(-3)

    if args.reference:
        reference_files = re.split(', |,', args.reference)

        if len(output_blobs) != len(reference_files):
            log.error('The number of reference files is not equal to the number of network outputs.')
            sys.exit(-5)

    if args.output:
        output_files = re.split(', |,', args.output)

        if len(output_blobs) != len(output_files):
            log.error('The number of output files is not equal to the number of network outputs.')
            sys.exit(-6)

    if args.export_gna_model:
        log.info(f'Writing GNA Model to {args.export_gna_model}')
        exec_net.export(args.export_gna_model)
        return 0

    if args.export_embedded_gna_model:
        log.info(f'Exported GNA embedded model to file {args.export_embedded_gna_model}')
        log.info(f'GNA embedded model export done for GNA generation {args.embedded_gna_configuration}')
        return 0

# ---------------------------Step 5. Create infer request--------------------------------------------------------------
# load_network() method of the IECore class with a specified number of requests (default 1) returns an ExecutableNetwork
# instance which stores infer requests. So you already created Infer requests in the previous step.

# ---------------------------Step 6. Prepare input---------------------------------------------------------------------
    file_data = [read_utterance_file(file_name) for file_name in input_files]
    input_data = {
        utterance_name: {
            input_blobs[i]: file_data[i][utterance_name] for i in range(len(input_blobs))
        }
        for utterance_name in file_data[0].keys()
    }

    if args.reference:
        references = {output_blobs[i]: read_utterance_file(reference_files[i]) for i in range(len(output_blobs))}

# ---------------------------Step 7. Do inference----------------------------------------------------------------------
    log.info('Starting inference in synchronous mode')
    results = {blob_name: {} for blob_name in output_blobs}
    total_infer_time = 0

    for i, key in enumerate(sorted(input_data)):
        start_infer_time = default_timer()

        # Reset states between utterance inferences to remove a memory impact
        for request in exec_net.requests:
            for state in request.query_state():
                state.reset()

        result = infer_data(input_data[key], exec_net, input_blobs, output_blobs)

        for blob_name in result.keys():
            results[blob_name][key] = result[blob_name]

        infer_time = default_timer() - start_infer_time
        total_infer_time += infer_time
        num_of_frames = file_data[0][key].shape[0]
        avg_infer_time_per_frame = infer_time / num_of_frames

# ---------------------------Step 8. Process output--------------------------------------------------------------------
        log.info('')
        log.info(f'Utterance {i} ({key}):')
        log.info(f'Total time in Infer (HW and SW): {infer_time * 1000:.2f}ms')
        log.info(f'Frames in utterance: {num_of_frames}')
        log.info(f'Average Infer time per frame: {avg_infer_time_per_frame * 1000:.2f}ms')

        for blob_name in output_blobs:
            log.info('')
            log.info(f'Output blob name: {blob_name}')
            log.info(f'Number scores per frame: {results[blob_name][key].shape[1]}')

            if args.reference:
                log.info('')
                compare_with_reference(results[blob_name][key], references[blob_name][key])

        if args.performance_counter:
            if 'GNA' in args.device:
                pc = exec_net.requests[0].get_perf_counts()
                total_cycles = int(pc['1.1 Total scoring time in HW']['real_time'])
                stall_cycles = int(pc['1.2 Stall scoring time in HW']['real_time'])
                active_cycles = total_cycles - stall_cycles
                frequency = 10**6
                if args.arch == 'CORE':
                    frequency *= GNA_CORE_FREQUENCY
                else:
                    frequency *= GNA_ATOM_FREQUENCY
                total_inference_time = total_cycles / frequency
                active_time = active_cycles / frequency
                stall_time = stall_cycles / frequency
                log.info('')
                log.info('Performance Statistics of GNA Hardware')
                log.info(f'   Total Inference Time: {(total_inference_time * 1000):.4f} ms')
                log.info(f'   Active Time: {(active_time * 1000):.4f} ms')
                log.info(f'   Stall Time:  {(stall_time * 1000):.4f} ms')

    log.info('')
    log.info(f'Total sample time: {total_infer_time * 1000:.2f}ms')

    if args.output:
        for i, blob_name in enumerate(results):
            write_utterance_file(output_files[i], results[blob_name])
            log.info(f'File {output_files[i]} was created!')

# ----------------------------------------------------------------------------------------------------------------------
    log.info('This sample is an API example, '
             'for any performance measurements please use the dedicated benchmark_app tool\n')
    return 0
Ejemplo n.º 4
0

print('select infer requests (0 für synchron)')
infer_req = int(input())

ie = IECore()
net = IENetwork(model=model_xml, weights=model_bin)


if infer_req == 0:

    # Synchron
    if os.path.isfile(exported_model):  # found exported mode
        print('found model to import')
        exec_net = ie.import_network(
            model_file=exported_model, device_name='MYRIAD',
            num_requests=1)
    else:
        print('creating exec model')
        exec_net = ie.load_network(
            network=net, num_requests=1, device_name='MYRIAD')
        exec_net.export(exported_model)

    input_blob = None
    feed_dict = {}
    for blob_name in net.inputs:
        if len(net.inputs[blob_name].shape) == 4:
            input_blob = blob_name
    output_blop = next(iter(net.outputs))
    n, c, h, w = net.inputs[input_blob].shape
Ejemplo n.º 5
0
def main():
    log.basicConfig(format='[ %(levelname)s ] %(message)s',
                    level=log.INFO,
                    stream=sys.stdout)
    args = parse_args()

    # ---------------------------Step 1. Initialize inference engine core--------------------------------------------------
    log.info('Creating Inference Engine')
    ie = IECore()

    # ---------------------------Step 2. Read a model in OpenVINO Intermediate Representation---------------
    if args.model:
        log.info(f'Reading the network: {args.model}')
        # .xml and .bin files
        net = ie.read_network(model=args.model)

        # ---------------------------Step 3. Configure input & output----------------------------------------------------------
        log.info('Configuring input and output blobs')
        # Get names of input and output blobs
        if args.input_layers:
            input_blobs = re.split(', |,', args.input_layers)
        else:
            input_blobs = [next(iter(net.input_info))]

        if args.output_layers:
            output_name_port = [
                output.split(':')
                for output in re.split(', |,', args.output_layers)
            ]
            try:
                output_name_port = [(blob_name, int(port))
                                    for blob_name, port in output_name_port]
            except ValueError:
                log.error('Output Parameter does not have a port.')
                sys.exit(-4)

            net.add_outputs(output_name_port)

            output_blobs = [blob_name for blob_name, port in output_name_port]
        else:
            output_blobs = [list(net.outputs.keys())[-1]]

        # Set input and output precision manually
        for blob_name in input_blobs:
            net.input_info[blob_name].precision = 'FP32'

        for blob_name in output_blobs:
            net.outputs[blob_name].precision = 'FP32'

        net.batch_size = args.batch_size

# ---------------------------Step 4. Loading model to the device-------------------------------------------------------
    devices = args.device.replace('HETERO:', '').split(',')
    plugin_config = {}

    if 'GNA' in args.device:
        gna_device_mode = devices[0] if '_' in devices[0] else 'GNA_AUTO'
        devices[0] = 'GNA'

        plugin_config['GNA_DEVICE_MODE'] = gna_device_mode
        plugin_config['GNA_PRECISION'] = f'I{args.quantization_bits}'

        # Get a GNA scale factor
        if args.import_gna_model:
            log.info(
                f'Using scale factor from the imported GNA model: {args.import_gna_model}'
            )
        else:
            utterances = read_utterance_file(args.input.split(',')[0])
            key = sorted(utterances)[0]
            scale_factor = get_scale_factor(utterances[key])
            log.info(
                f'Using scale factor of {scale_factor:.7f} calculated from first utterance.'
            )

            plugin_config['GNA_SCALE_FACTOR'] = str(scale_factor)

        if args.export_embedded_gna_model:
            plugin_config[
                'GNA_FIRMWARE_MODEL_IMAGE'] = args.export_embedded_gna_model
            plugin_config[
                'GNA_FIRMWARE_MODEL_IMAGE_GENERATION'] = args.embedded_gna_configuration

    device_str = f'HETERO:{",".join(devices)}' if 'HETERO' in args.device else devices[
        0]

    log.info('Loading the model to the plugin')
    if args.model:
        exec_net = ie.load_network(net, device_str, plugin_config)
    else:
        exec_net = ie.import_network(args.import_gna_model, device_str,
                                     plugin_config)
        input_blobs = [next(iter(exec_net.input_info))]
        output_blobs = [list(exec_net.outputs.keys())[-1]]

    if args.input:
        input_files = re.split(', |,', args.input)

        if len(input_blobs) != len(input_files):
            log.error(
                f'Number of network inputs ({len(input_blobs)}) is not equal '
                f'to number of ark files ({len(input_files)})')
            sys.exit(-3)

    if args.reference:
        reference_files = re.split(', |,', args.reference)

        if len(output_blobs) != len(reference_files):
            log.error(
                'The number of reference files is not equal to the number of network outputs.'
            )
            sys.exit(-5)

    if args.output:
        output_files = re.split(', |,', args.output)

        if len(output_blobs) != len(output_files):
            log.error(
                'The number of output files is not equal to the number of network outputs.'
            )
            sys.exit(-6)

    if args.export_gna_model:
        log.info(f'Writing GNA Model to {args.export_gna_model}')
        exec_net.export(args.export_gna_model)
        return 0

    if args.export_embedded_gna_model:
        log.info(
            f'Exported GNA embedded model to file {args.export_embedded_gna_model}'
        )
        log.info(
            f'GNA embedded model export done for GNA generation {args.embedded_gna_configuration}'
        )
        return 0

# ---------------------------Step 5. Create infer request--------------------------------------------------------------
# load_network() method of the IECore class with a specified number of requests (default 1) returns an ExecutableNetwork
# instance which stores infer requests. So you already created Infer requests in the previous step.

# ---------------------------Step 6. Prepare input---------------------------------------------------------------------
    file_data = [read_utterance_file(file_name) for file_name in input_files]
    input_data = {
        utterance_name: {
            input_blobs[i]: file_data[i][utterance_name]
            for i in range(len(input_blobs))
        }
        for utterance_name in file_data[0].keys()
    }

    if args.reference:
        references = {
            output_blobs[i]: read_utterance_file(reference_files[i])
            for i in range(len(output_blobs))
        }

# ---------------------------Step 7. Do inference----------------------------------------------------------------------
    log.info('Starting inference in synchronous mode')
    results = {blob_name: {} for blob_name in output_blobs}
    infer_times = []

    for key in sorted(input_data):
        start_infer_time = default_timer()

        # Reset states between utterance inferences to remove a memory impact
        for request in exec_net.requests:
            for state in request.query_state():
                state.reset()

        result = infer_data(input_data[key], exec_net, input_blobs,
                            output_blobs)

        for blob_name in result.keys():
            results[blob_name][key] = result[blob_name]

        infer_times.append(default_timer() - start_infer_time)

# ---------------------------Step 8. Process output--------------------------------------------------------------------
    for blob_name in output_blobs:
        for i, key in enumerate(sorted(results[blob_name])):
            log.info(f'Utterance {i} ({key})')
            log.info(f'Output blob name: {blob_name}')
            log.info(
                f'Frames in utterance: {results[blob_name][key].shape[0]}')
            log.info(
                f'Total time in Infer (HW and SW): {infer_times[i] * 1000:.2f}ms'
            )

            if args.reference:
                compare_with_reference(results[blob_name][key],
                                       references[blob_name][key])

            log.info('')

    log.info(f'Total sample time: {sum(infer_times) * 1000:.2f}ms')

    if args.output:
        for i, blob_name in enumerate(results):
            write_utterance_file(output_files[i], results[blob_name])
            log.info(f'File {output_files[i]} was created!')


# ----------------------------------------------------------------------------------------------------------------------
    log.info(
        'This sample is an API example, '
        'for any performance measurements please use the dedicated benchmark_app tool\n'
    )
    return 0