def main(args): graph = ts.new_graph() input_size = tuple([int(x) for x in args.input_size.split('x')]) input = graph.new_input(dims=input_size) all_w = create_layer_weights(graph, NUM_LAYERS, args.channels) all_arcs = parse_arcs(args.input_file) if args.num_models is not None: all_arcs = all_arcs[:args.num_models] # stem conv t = graph.conv2d(input=input, weight=graph.new_weight(dims=(args.channels, input.dim(1), 1, 1)), strides=(1, 1), padding="SAME", activation="RELU") for arc in all_arcs: create_architecture(arc, graph, t, all_w) if args.save_models: onnx_model = ts.export_onnx(graph) onnx.save(onnx_model, 'original_model.onnx') new_graph = ts.optimize(graph, alpha=1.0, budget=1000) if args.save_models: onnx_model = ts.export_onnx(new_graph) onnx.save(onnx_model, 'optimized_model.onnx')
def main(args): graph = ts.new_graph() input_size = tuple([int(x) for x in args.input_size.split('x')]) input = graph.new_input(dims=input_size) shared_resnet_model(graph, input, args.num_models, args.num_shared_blocks) if args.save_graphs: original_model = ts.export_onnx(graph) onnx.save(original_model, 'original_model.onnx') new_graph = ts.optimize(graph, alpha=1.0, budget=1000) if args.save_graphs: optimized_model = ts.export_onnx(new_graph) onnx.save(optimized_model, 'optimized_model.onnx')
def graph_latency(graph: Graph, batchsize, warmup, number, repeat, optimize, alpha=1.0, budget=1000): """ Measure the latency of TASO optimized computation graph in TASO framework. :param graph: ios.ir.Graph The computation graph that is going to measure the latency. :param batchsize: int The execution batch size. :param warmup: int Not used. :param number: int Not used. :param repeat: int The number of latency measurement. :param optimize: boolean When optimize=True, optimize the computation graph in TASO and measure the latency. When optimize=False, directly measure the latency in TASO. :param alpha: The relaxation coefficient. :param budget: The iteration budget. :return: List[float] The latency measurement results. """ tg = graph_ios2taso(graph, batchsize) if optimize: tg = taso.optimize(tg, alpha=alpha, budget=budget) # warmup and number has already implemented in taso itself return [tg.run_time() for _ in range(repeat)]
strides = (1, 1) unoptimized_model = ts.export_onnx(graph) debug_dir = None if args.debug_dir is not None: debug_dir = args.debug_dir.resolve() debug_dir.mkdir(parents=True) if debug_dir is not None: graph.export_to_file(str(debug_dir / "unoptimized.txt").encode()) if args.export: onnx.checker.check_model(unoptimized_model) onnx.save( unoptimized_model, str(args.output_dir / f"resnext50_{batch_size}_unoptimized.onnx")) _optimized_model = ts.optimize(graph, alpha=args.alpha, budget=args.budget, print_subst=args.print_subst) if debug_dir is not None: _optimized_model.export_to_file( str(debug_dir / "optimized.txt").encode()) if args.export: optimized_model = ts.export_onnx(_optimized_model) onnx.save( optimized_model, str(args.output_dir / f"resnext50_{batch_size}_optimized.onnx")) # new_graph = ts.optimize(graph, alpha=args.alpha, budget=args.budget) # onnx_model = ts.export_onnx(new_graph) # onnx.checker.check_model(onnx_model) # onnx.save(onnx_model, "resnext50_xflow.onnx")
old_graph.build_graph() # warm up for _, data in enumerate(test_input): old_graph.taso_forward(data) # real run time_sum = 0 for _, data in enumerate(test_input): start = time.time() old_graph.taso_forward(data) time_sum += (time.time() - start) print("cuDNN runtime inference time before taso: {}sec".format(time_sum / len(test_input))) f.write("cuDNN runtime inference time before taso: {}sec\n\n".format(time_sum / len(test_input))) print("taso.optimize()") new_graph = taso.optimize(old_graph, alpha=1.05, budget=100) #print("[after opt] taso runtime performance: {}ms".format(new_graph.run_time())) #taso_tensor_input = new_graph.new_input_with_value(dims=(1, 3, 299, 299)) new_graph.build_graph() # warm up for _, data in enumerate(test_input): new_graph.taso_forward(data) # real run time_sum = 0 for _, data in enumerate(test_input): start = time.time() new_graph.taso_forward(data) time_sum += (time.time() - start) print("cuDNN runtime inference time after taso optimization: {}sec".format(time_sum / len(test_input))) f.write("cuDNN runtime inference time after taso optimization: {}sec\n\n".format(time_sum / len(test_input)))
import taso import onnx import argparse parser = argparse.ArgumentParser() parser.add_argument("-f", "--file", help="Path to input ONNX file", required=True) args = parser.parse_args() #graph = taso.load_onnx("/home/ubuntu/taso/onnx/squeezenet1.1.onnx") #graph = taso.load_onnx("/home/ubuntu/taso/onnx/bertsquad10.onnx") graph = taso.load_onnx(args.file) #graph = xflow.load("/home/ubuntu/resnext-101.onnx") #graph = xflow.load("/home/ubuntu/ONNXModel/inception_v2/model.onnx") new_graph = taso.optimize(graph, alpha = 1.0, budget = 100, print_subst = True) onnx_model = taso.export_onnx(new_graph) onnx.checker.check_model(onnx_model) onnx.save(onnx_model, "{}.taso.onnx".format(args.file))
t.append(combine(graph, x, input)) midt = list() midt.append(graph.add(graph.relu(t[0]), graph.sigmoid(t[3]))) midt.append(graph.add(graph.sigmoid(t[1]), graph.tanh(t[2]))) midt.append(graph.mul(graph.sigmoid(t[4]), graph.tanh(t[5]))) midt.append(graph.mul(graph.sigmoid(t[6]), graph.relu(t[7]))) midt.append(graph.add(graph.sigmoid(midt[1]), graph.tanh(midt[2]))) midt.append(graph.mul(graph.tanh(midt[0]), graph.tanh(midt[3]))) midt.append(graph.mul(graph.tanh(midt[4]), graph.tanh(midt[5]))) return graph.tanh(midt[6]) graph = taso.new_graph() xs = list() for i in range(length): xs.append(graph.new_input(dims=(1, hidden_size))) state = graph.new_weight(dims=(1, hidden_size)) for i in range(length): state = nas_node(graph, state, xs[i]) old_time = graph.run_time() new_graph = taso.optimize(graph, alpha=1.0, budget=100) new_time = new_graph.run_time() print("Run time of original graph is: {}".format(old_time)) print("Run time of optimized graph is: {}".format(new_time)) args = get_args() with open(args.result_file, "a") as f: f.write("{}\t{}\n".format(old_time, new_time))
# transpose the output back output = graph.transpose(output,perm=(1,0,2), shuffle=True) output = graph.reshape(output, shape=(64, 1024)) # a final linear layer linear = graph.new_weight(dims=(d_model, d_model)) output = graph.matmul(input, linear) return output if __name__ == '__main__': test_input = list() for i in range(500): test_input.append(np.random.randn(seq_length, hidden_dims)) graph = ts.new_graph() input = graph.new_input(dims=(seq_length, hidden_dims)) input = graph.relu(input) t = input for i in range(12): t = attention(graph, t, 16) print(t) before_model = ts.export_onnx(graph) onnx.save(before_model, "./onnx_models/bert.onnx") new_graph = ts.optimize(graph, alpha=1.05, budget=100) after_model = ts.export_onnx(new_graph) onnx.save(after_model, "./onnx_models/bert_taso.onnx")
ts.append(graph.maxpool2d(input=cur, kernels=(3,3), strides=(2,2), padding="SAME")) ts.append(seperable_conv(graph, input=outputs[0], out_channels=out_channels, kernels=(3,3), strides=(1,1), padding="SAME")) outputs.append(graph.add(ts[6], ts[7])) ts.append(graph.avgpool2d(input=outputs[0], kernels=(3,3), strides=(1,1), padding="SAME")) ts.append(outputs[1]) outputs.append(graph.add(ts[8], ts[9])) return graph.concat(1, outputs) graph = ts.new_graph() input = graph.new_input(dims=(1,3,224,224)) weight = graph.new_weight(dims=(64,3,7,7)) input = graph.conv2d(input=input, weight=weight, strides=(2,2), padding="SAME", activation="RELU") input = graph.maxpool2d(input=input, kernels=(3,3), strides=(2,2), padding="SAME") out_channels = 128 for i in range(3): prev = input cur = input for j in range(5): t = normal_cell(graph, prev, cur, out_channels) prev = cur cur = t out_channels *= 2 input = reduction_cell(graph, prev, cur, out_channels) new_graph = ts.optimize(graph, alpha=1.0, budget=-1) onnx_model = ts.export_onnx(new_graph) onnx.checker.check_model(onnx_model) onnx.save(onnx_model, "nasneta_taso.onnx")
import taso import onnx #Build DNN model graph = taso.new_graph() input = graph.new_input(dims=(1,128,56,56)) w1 = graph.new_weight(dims=(128,128,3,3)) w2 = graph.new_weight(dims=(128,128,1,1)) w3 = graph.new_weight(dims=(128,128,3,3)) left = graph.conv2d(input=input, weight=w1, strides=(1,1), padding="SAME", activation="RELU") left = graph.conv2d(input=input, weight=w3, strides=(1,1), padding="SAME") right = graph.conv2d(input=input, weight=w2, strides=(1,1), padding="SAME", activation="RELU") output = graph.add(left, right) output = graph.relu(output) #Optimize DNN model new_graph = taso.optimize(graph) onnx_model = taso.export_onnx(new_graph) onnx.save(onnx_model, "arbitrary_DNN.onnx")
def inception_logits(graph, v): return pool2d(graph, v, pool_type='global_avg') def inception_v3(batch_size=1): graph = taso.new_graph() v = graph.new_input(dims=(batch_size, 3, 299, 299)) v = inception_front(graph, v) v = inception_a(graph, v, 32) v = inception_a(graph, v, 64) v = inception_a(graph, v, 64) v = inception_b(graph, v) v = inception_c(graph, v, 128) v = inception_c(graph, v, 160) v = inception_c(graph, v, 160) v = inception_c(graph, v, 192) v = inception_d(graph, v) v = inception_e(graph, v) v = inception_e(graph, v) v = inception_logits(graph, v) return graph graph = inception_v3( batch_size=32) # change batch_size from 4 to 8 would cause error. opt_graph = taso.optimize(graph, alpha=1.0, budget=30) print(graph.run_time()) print(opt_graph.run_time())