Beispiel #1
0
    def benchmarkTrainingBlockGRUVsGRUCell(self):
        print("Comparison GRUBlockCell vs GRUCell")
        print("--------------------------------------------------------------")
        print("Training speed GRUBlockCell vs GRUCell")
        print(
            "batch_size, cell_size, input_size, time_steps, GPU, "
            "basic_time_training, block_time_training, performance_training[%]"
        )
        iters = 10

        for config in benchmarking.dict_product({
                "use_gpu": [True, False],
                "batch_size": [1, 32, 128],
                "cell_size": [128, 512],
                "input_size": [128, 512],
                "time_steps": [50]
        }):
            basic_time, block_time = training_gru_block_vs_gru_cell(
                config["batch_size"], config["cell_size"],
                config["input_size"], config["time_steps"], config["use_gpu"],
                iters)
            self.report_benchmark(
                name="GRUCell_training_time_BS%i_CS%i_IS%i_TS%i_gpu_%s" %
                (config["batch_size"], config["cell_size"],
                 config["input_size"], config["time_steps"],
                 config["use_gpu"]),
                iters=iters,
                wall_time=basic_time)
            self.report_benchmark(
                name="GRUBlockCell_training_time_BS%i_CS%i_IS%i_TS%i_gpu_%s" %
                (config["batch_size"], config["cell_size"],
                 config["input_size"], config["time_steps"],
                 config["use_gpu"]),
                iters=iters,
                wall_time=block_time)
Beispiel #2
0
    def benchmarkSingleBpropStepBlockGRUVsGRUCell(self):
        print("--------------------------------------------------------------")
        print("Single bprop step speed GRUBlockCell vs GRUCell")
        print("batch_size, cell_size, input_size, GPU, basic_time, "
              "block_time, performance_inference[%]")
        iters = 10
        for config in benchmarking.dict_product({
                "use_gpu": [True, False],
                "batch_size": [1, 32, 128],
                "cell_size": [128, 512],
                "input_size": [128, 512]
        }):
            basic_time, block_time = single_bprop_step_gru_block_vs_gru_cell(
                config["batch_size"], config["cell_size"],
                config["input_size"], config["use_gpu"], iters)
            self.report_benchmark(
                name="GRUCell_Bprop_single_step_time_BS%i_CS%i_IS%i_gpu_%s" %
                (config["batch_size"], config["cell_size"],
                 config["input_size"], config["use_gpu"]),
                iters=iters,
                wall_time=basic_time)
            self.report_benchmark(
                name="GRUBlockCell_Bprop_single_step_time_BS%i_CS%i_IS%i_gpu_%s"
                % (config["batch_size"], config["cell_size"],
                   config["input_size"], config["use_gpu"]),
                iters=iters,
                wall_time=block_time)

        print("--------------------------------------------------------------")
Beispiel #3
0
    def benchmarkLSTMBlockCellFpropWithDynamicRNN(self):
        print("BlockLSTMCell forward propagation via dynamic_rnn().")
        print("--------------------------------------------------------------")
        print("LSTMBlockCell Seconds per inference.")
        print("batch_size,cell_size,input_size,time_steps,use_gpu,wall_time")
        iters = 10
        for config in benchmarking.dict_product({
                "batch_size": [1, 8, 13, 32, 67, 128],
                "cell_size": [128, 250, 512, 650, 1024, 1350],
                "time_steps": [40],
                "use_gpu": [True, False],
                "dtype": ["float32", "float16"],
        }):
            dtype = dtypes.float32 if config[
                "dtype"] == "float32" else dtypes.float16
            with ops.Graph().as_default():
                with benchmarking.device(use_gpu=config["use_gpu"]):
                    inputs = variable_scope.get_variable(
                        "x",
                        dtype=dtype,
                        shape=[
                            config["time_steps"], config["batch_size"],
                            config["cell_size"]
                        ])
                    cell = lstm_ops.LSTMBlockCell(config["cell_size"],
                                                  dtype=dtype)
                    outputs = rnn.dynamic_rnn(cell,
                                              inputs,
                                              time_major=True,
                                              dtype=dtype)
                    init_op = variables.global_variables_initializer()

                with session.Session() as sess:
                    sess.run(init_op)
                    wall_time = benchmarking.seconds_per_run(
                        outputs, sess, iters)

                # Print to stdout. If the TEST_REPORT_FILE_PREFIX environment variable
                # is set, this will produce a copy-paste-able CSV file.
                print(",".join(
                    map(str, [
                        config["dtype"], config["batch_size"],
                        config["cell_size"], config["cell_size"],
                        config["time_steps"], config["use_gpu"], wall_time
                    ])))
                benchmark_name_template = "_".join([
                    "LSTMBlockCell_fprop", "DT_%(dtype)s", "BS%(batch_size)i",
                    "CS%(cell_size)i", "IS%(cell_size)i", "TS%(time_steps)i",
                    "gpu_%(use_gpu)s"
                ])

                self.report_benchmark(name=benchmark_name_template % config,
                                      iters=iters,
                                      wall_time=wall_time,
                                      extras=config)