def benchmarkTrainingBlockGRUVsGRUCell(self): print("Comparison GRUBlockCell vs GRUCell") print("--------------------------------------------------------------") print("Training speed GRUBlockCell vs GRUCell") print( "batch_size, cell_size, input_size, time_steps, GPU, " "basic_time_training, block_time_training, performance_training[%]" ) iters = 10 for config in benchmarking.dict_product({ "use_gpu": [True, False], "batch_size": [1, 32, 128], "cell_size": [128, 512], "input_size": [128, 512], "time_steps": [50] }): basic_time, block_time = training_gru_block_vs_gru_cell( config["batch_size"], config["cell_size"], config["input_size"], config["time_steps"], config["use_gpu"], iters) self.report_benchmark( name="GRUCell_training_time_BS%i_CS%i_IS%i_TS%i_gpu_%s" % (config["batch_size"], config["cell_size"], config["input_size"], config["time_steps"], config["use_gpu"]), iters=iters, wall_time=basic_time) self.report_benchmark( name="GRUBlockCell_training_time_BS%i_CS%i_IS%i_TS%i_gpu_%s" % (config["batch_size"], config["cell_size"], config["input_size"], config["time_steps"], config["use_gpu"]), iters=iters, wall_time=block_time)
def benchmarkSingleBpropStepBlockGRUVsGRUCell(self): print("--------------------------------------------------------------") print("Single bprop step speed GRUBlockCell vs GRUCell") print("batch_size, cell_size, input_size, GPU, basic_time, " "block_time, performance_inference[%]") iters = 10 for config in benchmarking.dict_product({ "use_gpu": [True, False], "batch_size": [1, 32, 128], "cell_size": [128, 512], "input_size": [128, 512] }): basic_time, block_time = single_bprop_step_gru_block_vs_gru_cell( config["batch_size"], config["cell_size"], config["input_size"], config["use_gpu"], iters) self.report_benchmark( name="GRUCell_Bprop_single_step_time_BS%i_CS%i_IS%i_gpu_%s" % (config["batch_size"], config["cell_size"], config["input_size"], config["use_gpu"]), iters=iters, wall_time=basic_time) self.report_benchmark( name="GRUBlockCell_Bprop_single_step_time_BS%i_CS%i_IS%i_gpu_%s" % (config["batch_size"], config["cell_size"], config["input_size"], config["use_gpu"]), iters=iters, wall_time=block_time) print("--------------------------------------------------------------")
def benchmarkLSTMBlockCellFpropWithDynamicRNN(self): print("BlockLSTMCell forward propagation via dynamic_rnn().") print("--------------------------------------------------------------") print("LSTMBlockCell Seconds per inference.") print("batch_size,cell_size,input_size,time_steps,use_gpu,wall_time") iters = 10 for config in benchmarking.dict_product({ "batch_size": [1, 8, 13, 32, 67, 128], "cell_size": [128, 250, 512, 650, 1024, 1350], "time_steps": [40], "use_gpu": [True, False], "dtype": ["float32", "float16"], }): dtype = dtypes.float32 if config[ "dtype"] == "float32" else dtypes.float16 with ops.Graph().as_default(): with benchmarking.device(use_gpu=config["use_gpu"]): inputs = variable_scope.get_variable( "x", dtype=dtype, shape=[ config["time_steps"], config["batch_size"], config["cell_size"] ]) cell = lstm_ops.LSTMBlockCell(config["cell_size"], dtype=dtype) outputs = rnn.dynamic_rnn(cell, inputs, time_major=True, dtype=dtype) init_op = variables.global_variables_initializer() with session.Session() as sess: sess.run(init_op) wall_time = benchmarking.seconds_per_run( outputs, sess, iters) # Print to stdout. If the TEST_REPORT_FILE_PREFIX environment variable # is set, this will produce a copy-paste-able CSV file. print(",".join( map(str, [ config["dtype"], config["batch_size"], config["cell_size"], config["cell_size"], config["time_steps"], config["use_gpu"], wall_time ]))) benchmark_name_template = "_".join([ "LSTMBlockCell_fprop", "DT_%(dtype)s", "BS%(batch_size)i", "CS%(cell_size)i", "IS%(cell_size)i", "TS%(time_steps)i", "gpu_%(use_gpu)s" ]) self.report_benchmark(name=benchmark_name_template % config, iters=iters, wall_time=wall_time, extras=config)