def main(args): graph = ts.new_graph() input_size = tuple([int(x) for x in args.input_size.split('x')]) input = graph.new_input(dims=input_size) all_w = create_layer_weights(graph, NUM_LAYERS, args.channels) all_arcs = parse_arcs(args.input_file) if args.num_models is not None: all_arcs = all_arcs[:args.num_models] # stem conv t = graph.conv2d(input=input, weight=graph.new_weight(dims=(args.channels, input.dim(1), 1, 1)), strides=(1, 1), padding="SAME", activation="RELU") for arc in all_arcs: create_architecture(arc, graph, t, all_w) if args.save_models: onnx_model = ts.export_onnx(graph) onnx.save(onnx_model, 'original_model.onnx') new_graph = ts.optimize(graph, alpha=1.0, budget=1000) if args.save_models: onnx_model = ts.export_onnx(new_graph) onnx.save(onnx_model, 'optimized_model.onnx')
def main(args): graph = ts.new_graph() input_size = tuple([int(x) for x in args.input_size.split('x')]) input = graph.new_input(dims=input_size) shared_resnet_model(graph, input, args.num_models, args.num_shared_blocks) if args.save_graphs: original_model = ts.export_onnx(graph) onnx.save(original_model, 'original_model.onnx') new_graph = ts.optimize(graph, alpha=1.0, budget=1000) if args.save_graphs: optimized_model = ts.export_onnx(new_graph) onnx.save(optimized_model, 'optimized_model.onnx')
for i in range(3): t = resnext_block(graph, t, (1, 1), 128, 32) strides = (2, 2) for i in range(4): t = resnext_block(graph, t, strides, 256, 32) strides = (1, 1) strides = (2, 2) for i in range(6): t = resnext_block(graph, t, strides, 512, 32) strides = (1, 1) strides = (2, 2) for i in range(3): t = resnext_block(graph, t, strides, 1024, 32) strides = (1, 1) unoptimized_model = ts.export_onnx(graph) debug_dir = None if args.debug_dir is not None: debug_dir = args.debug_dir.resolve() debug_dir.mkdir(parents=True) if debug_dir is not None: graph.export_to_file(str(debug_dir / "unoptimized.txt").encode()) if args.export: onnx.checker.check_model(unoptimized_model) onnx.save( unoptimized_model, str(args.output_dir / f"resnext50_{batch_size}_unoptimized.onnx")) _optimized_model = ts.optimize(graph, alpha=args.alpha, budget=args.budget, print_subst=args.print_subst)
#taso_tensor_input = new_graph.new_input_with_value(dims=(1, 3, 299, 299)) new_graph.build_graph() # warm up for _, data in enumerate(test_input): new_graph.taso_forward(data) # real run time_sum = 0 for _, data in enumerate(test_input): start = time.time() new_graph.taso_forward(data) time_sum += (time.time() - start) print("cuDNN runtime inference time after taso optimization: {}sec".format(time_sum / len(test_input))) f.write("cuDNN runtime inference time after taso optimization: {}sec\n\n".format(time_sum / len(test_input))) print("taso.export_onnx()") new_model = taso.export_onnx(new_graph) onnx.save(new_model, "./onnx_models/inception_v3.onnx") print("onnx.load()") taso_model = onnx.load("./onnx_models/inception_v3.onnx") print("TASO model graph:\n{}".format(onnx.helper.printable_graph(taso_model.graph))) print("##### INFERENCE with onnxruntime (after TASO) #####") sess = rt.InferenceSession("./onnx_models/inception_v3.onnx") input_name = sess.get_inputs()[0].name label_name = sess.get_outputs()[0].name # warm up for _, data in enumerate(test_input): sess.run([label_name], {input_name: data}) # real run time_sum = 0 for _, data in enumerate(test_input): start = time.time()
# transpose the output back output = graph.transpose(output,perm=(1,0,2), shuffle=True) output = graph.reshape(output, shape=(64, 1024)) # a final linear layer linear = graph.new_weight(dims=(d_model, d_model)) output = graph.matmul(input, linear) return output if __name__ == '__main__': test_input = list() for i in range(500): test_input.append(np.random.randn(seq_length, hidden_dims)) graph = ts.new_graph() input = graph.new_input(dims=(seq_length, hidden_dims)) input = graph.relu(input) t = input for i in range(12): t = attention(graph, t, 16) print(t) before_model = ts.export_onnx(graph) onnx.save(before_model, "./onnx_models/bert.onnx") new_graph = ts.optimize(graph, alpha=1.05, budget=100) after_model = ts.export_onnx(new_graph) onnx.save(after_model, "./onnx_models/bert_taso.onnx")
ts.append(graph.maxpool2d(input=cur, kernels=(3,3), strides=(2,2), padding="SAME")) ts.append(seperable_conv(graph, input=outputs[0], out_channels=out_channels, kernels=(3,3), strides=(1,1), padding="SAME")) outputs.append(graph.add(ts[6], ts[7])) ts.append(graph.avgpool2d(input=outputs[0], kernels=(3,3), strides=(1,1), padding="SAME")) ts.append(outputs[1]) outputs.append(graph.add(ts[8], ts[9])) return graph.concat(1, outputs) graph = ts.new_graph() input = graph.new_input(dims=(1,3,224,224)) weight = graph.new_weight(dims=(64,3,7,7)) input = graph.conv2d(input=input, weight=weight, strides=(2,2), padding="SAME", activation="RELU") input = graph.maxpool2d(input=input, kernels=(3,3), strides=(2,2), padding="SAME") out_channels = 128 for i in range(3): prev = input cur = input for j in range(5): t = normal_cell(graph, prev, cur, out_channels) prev = cur cur = t out_channels *= 2 input = reduction_cell(graph, prev, cur, out_channels) new_graph = ts.optimize(graph, alpha=1.0, budget=-1) onnx_model = ts.export_onnx(new_graph) onnx.checker.check_model(onnx_model) onnx.save(onnx_model, "nasneta_taso.onnx")
for i in range(3): t = resnext_block(graph, t, (1, 1), 128, 32) strides = (2, 2) for i in range(4): t = resnext_block(graph, t, strides, 256, 32) strides = (1, 1) strides = (2, 2) for i in range(6): t = resnext_block(graph, t, strides, 512, 32) strides = (1, 1) strides = (2, 2) for i in range(3): t = resnext_block(graph, t, strides, 1024, 32) strides = (1, 1) before_model = ts.export_onnx(graph) onnx.save(before_model, "./onnx_models/resnext50.onnx") print("##### INFERENCE (before TASO) #####") sess1 = rt.InferenceSession("./onnx_models/resnext50.onnx") input_name = sess1.get_inputs()[0].name label_name = sess1.get_outputs()[0].name time_sum = 0 for _, data in enumerate(test_input): start = time.time() output1 = sess1.run([label_name], {input_name: data}) #print("torch_output:\n{}".format(torch_output)) time_sum += (time.time() - start) print("inference time before taso: {}s".format(time_sum / len(test_input)))
graph = ts.new_graph() input = graph.new_input(dims=(1, 64, 56, 56)) t = input for i in range(3): t = resnet_block(graph, t, (1, 1), 64) strides = (2, 2) for i in range(4): t = resnet_block(graph, t, strides, 128) strides = (1, 1) strides = (2, 2) for i in range(6): t = resnet_block(graph, t, strides, 256) strides = (1, 1) strides = (2, 2) for i in range(3): t = resnet_block(graph, t, strides, 512) strides = (1, 1) onnx_model = ts.export_onnx(graph) onnx.save(onnx_model, "resnet50_old.onnx") old_time = graph.run_time() #onnx.checker.check_model(onnx_model) new_graph = ts.optimize(graph, alpha=1.0, budget=1000) new_time = new_graph.run_time() print("Run time of original graph is: {}".format(old_time)) print("Run time of optimized graph is: {}".format(new_time))