Beispiel #1
0
def main(args):
    graph = ts.new_graph()
    input_size = tuple([int(x) for x in args.input_size.split('x')])
    input = graph.new_input(dims=input_size)
    all_w = create_layer_weights(graph, NUM_LAYERS, args.channels)
    all_arcs = parse_arcs(args.input_file)
    if args.num_models is not None:
        all_arcs = all_arcs[:args.num_models]

    # stem conv
    t = graph.conv2d(input=input,
                     weight=graph.new_weight(dims=(args.channels, input.dim(1),
                                                   1, 1)),
                     strides=(1, 1),
                     padding="SAME",
                     activation="RELU")

    for arc in all_arcs:
        create_architecture(arc, graph, t, all_w)

    if args.save_models:
        onnx_model = ts.export_onnx(graph)
        onnx.save(onnx_model, 'original_model.onnx')

    new_graph = ts.optimize(graph, alpha=1.0, budget=1000)
    if args.save_models:
        onnx_model = ts.export_onnx(new_graph)
        onnx.save(onnx_model, 'optimized_model.onnx')
Beispiel #2
0
def main(args):
    graph = ts.new_graph()
    input_size = tuple([int(x) for x in args.input_size.split('x')])
    input = graph.new_input(dims=input_size)
    shared_resnet_model(graph, input, args.num_models, args.num_shared_blocks)
    if args.save_graphs:
        original_model = ts.export_onnx(graph)
        onnx.save(original_model, 'original_model.onnx')

    new_graph = ts.optimize(graph, alpha=1.0, budget=1000)
    if args.save_graphs:
        optimized_model = ts.export_onnx(new_graph)
        onnx.save(optimized_model, 'optimized_model.onnx')
def graph_latency(graph: Graph,
                  batchsize,
                  warmup,
                  number,
                  repeat,
                  optimize,
                  alpha=1.0,
                  budget=1000):
    """
    Measure the latency of TASO optimized computation graph in TASO framework.

    :param graph: ios.ir.Graph
        The computation graph that is going to measure the latency.

    :param batchsize: int
        The execution batch size.

    :param warmup: int
        Not used.

    :param number: int
        Not used.

    :param repeat: int
        The number of latency measurement.

    :param optimize: boolean
        When optimize=True, optimize the computation graph in TASO and measure the latency.
        When optimize=False, directly measure the latency in TASO.

    :param alpha:
        The relaxation coefficient.

    :param budget:
        The iteration budget.

    :return: List[float]
        The latency measurement results.
    """
    tg = graph_ios2taso(graph, batchsize)
    if optimize:
        tg = taso.optimize(tg, alpha=alpha, budget=budget)
    # warmup and number has already implemented in taso itself
    return [tg.run_time() for _ in range(repeat)]
Beispiel #4
0
        strides = (1, 1)

    unoptimized_model = ts.export_onnx(graph)
    debug_dir = None
    if args.debug_dir is not None:
        debug_dir = args.debug_dir.resolve()
        debug_dir.mkdir(parents=True)
    if debug_dir is not None:
        graph.export_to_file(str(debug_dir / "unoptimized.txt").encode())
    if args.export:
        onnx.checker.check_model(unoptimized_model)
        onnx.save(
            unoptimized_model,
            str(args.output_dir / f"resnext50_{batch_size}_unoptimized.onnx"))
    _optimized_model = ts.optimize(graph,
                                   alpha=args.alpha,
                                   budget=args.budget,
                                   print_subst=args.print_subst)
    if debug_dir is not None:
        _optimized_model.export_to_file(
            str(debug_dir / "optimized.txt").encode())
    if args.export:
        optimized_model = ts.export_onnx(_optimized_model)
        onnx.save(
            optimized_model,
            str(args.output_dir / f"resnext50_{batch_size}_optimized.onnx"))
    # new_graph = ts.optimize(graph, alpha=args.alpha, budget=args.budget)
    # onnx_model = ts.export_onnx(new_graph)
    # onnx.checker.check_model(onnx_model)
    # onnx.save(onnx_model, "resnext50_xflow.onnx")
Beispiel #5
0
    old_graph.build_graph()
    # warm up
    for _, data in enumerate(test_input):
        old_graph.taso_forward(data)
    # real run
    time_sum = 0
    for _, data in enumerate(test_input):
        start = time.time()
        old_graph.taso_forward(data)
        time_sum += (time.time() - start)
    print("cuDNN runtime inference time before taso: {}sec".format(time_sum / len(test_input)))
    f.write("cuDNN runtime inference time before taso: {}sec\n\n".format(time_sum / len(test_input)))


    print("taso.optimize()")
    new_graph = taso.optimize(old_graph, alpha=1.05, budget=100)
    #print("[after opt] taso runtime performance: {}ms".format(new_graph.run_time()))
    #taso_tensor_input = new_graph.new_input_with_value(dims=(1, 3, 299, 299))
    new_graph.build_graph()
    # warm up
    for _, data in enumerate(test_input):
        new_graph.taso_forward(data)
    # real run
    time_sum = 0
    for _, data in enumerate(test_input):
        start = time.time()
        new_graph.taso_forward(data)
        time_sum += (time.time() - start)
    print("cuDNN runtime inference time after taso optimization: {}sec".format(time_sum / len(test_input)))
    f.write("cuDNN runtime inference time after taso optimization: {}sec\n\n".format(time_sum / len(test_input)))
Beispiel #6
0
import taso
import onnx
import argparse

parser = argparse.ArgumentParser()
parser.add_argument("-f", "--file", help="Path to input ONNX file", required=True)

args = parser.parse_args()

#graph = taso.load_onnx("/home/ubuntu/taso/onnx/squeezenet1.1.onnx")
#graph = taso.load_onnx("/home/ubuntu/taso/onnx/bertsquad10.onnx")
graph = taso.load_onnx(args.file)
#graph = xflow.load("/home/ubuntu/resnext-101.onnx") 
#graph = xflow.load("/home/ubuntu/ONNXModel/inception_v2/model.onnx")
new_graph = taso.optimize(graph, alpha = 1.0, budget = 100, print_subst = True)
onnx_model = taso.export_onnx(new_graph)
onnx.checker.check_model(onnx_model)
onnx.save(onnx_model, "{}.taso.onnx".format(args.file))
Beispiel #7
0
        t.append(combine(graph, x, input))
    midt = list()
    midt.append(graph.add(graph.relu(t[0]), graph.sigmoid(t[3])))
    midt.append(graph.add(graph.sigmoid(t[1]), graph.tanh(t[2])))
    midt.append(graph.mul(graph.sigmoid(t[4]), graph.tanh(t[5])))
    midt.append(graph.mul(graph.sigmoid(t[6]), graph.relu(t[7])))
    midt.append(graph.add(graph.sigmoid(midt[1]), graph.tanh(midt[2])))
    midt.append(graph.mul(graph.tanh(midt[0]), graph.tanh(midt[3])))
    midt.append(graph.mul(graph.tanh(midt[4]), graph.tanh(midt[5])))
    return graph.tanh(midt[6])

graph = taso.new_graph()
xs = list()
for i in range(length):
    xs.append(graph.new_input(dims=(1, hidden_size)))
state = graph.new_weight(dims=(1, hidden_size))
for i in range(length):
    state = nas_node(graph, state, xs[i])

old_time = graph.run_time()

new_graph = taso.optimize(graph, alpha=1.0, budget=100)

new_time = new_graph.run_time()
print("Run time of original graph is: {}".format(old_time))
print("Run time of optimized graph is: {}".format(new_time))

args = get_args()
with open(args.result_file, "a") as f:
    f.write("{}\t{}\n".format(old_time, new_time))
Beispiel #8
0
    # transpose the output back
    output = graph.transpose(output,perm=(1,0,2), shuffle=True)
    output = graph.reshape(output, shape=(64, 1024))

    # a final linear layer
    linear = graph.new_weight(dims=(d_model, d_model))
    output = graph.matmul(input, linear)
    return output

if __name__ == '__main__':
    test_input = list()
    for i in range(500):
        test_input.append(np.random.randn(seq_length, hidden_dims))

    graph = ts.new_graph()
    input = graph.new_input(dims=(seq_length, hidden_dims))
    input = graph.relu(input)
    t = input
    for i in range(12):
        t = attention(graph, t, 16)

    print(t)

    before_model = ts.export_onnx(graph)
    onnx.save(before_model, "./onnx_models/bert.onnx")

    new_graph = ts.optimize(graph, alpha=1.05, budget=100)

    after_model = ts.export_onnx(new_graph)
    onnx.save(after_model, "./onnx_models/bert_taso.onnx")
Beispiel #9
0
    ts.append(graph.maxpool2d(input=cur, kernels=(3,3), strides=(2,2), padding="SAME"))
    ts.append(seperable_conv(graph, input=outputs[0], out_channels=out_channels,
              kernels=(3,3), strides=(1,1), padding="SAME"))
    outputs.append(graph.add(ts[6], ts[7]))
    ts.append(graph.avgpool2d(input=outputs[0], kernels=(3,3), strides=(1,1), padding="SAME"))
    ts.append(outputs[1])
    outputs.append(graph.add(ts[8], ts[9]))
    return graph.concat(1, outputs)

graph = ts.new_graph()
input = graph.new_input(dims=(1,3,224,224))
weight = graph.new_weight(dims=(64,3,7,7))
input = graph.conv2d(input=input, weight=weight, strides=(2,2),
                 padding="SAME", activation="RELU")
input = graph.maxpool2d(input=input, kernels=(3,3), strides=(2,2), padding="SAME")

out_channels = 128
for i in range(3):
    prev = input
    cur = input
    for j in range(5):
        t = normal_cell(graph, prev, cur, out_channels)
        prev = cur
        cur = t
    out_channels *= 2
    input = reduction_cell(graph, prev, cur, out_channels)
new_graph = ts.optimize(graph, alpha=1.0, budget=-1)
onnx_model = ts.export_onnx(new_graph)
onnx.checker.check_model(onnx_model)
onnx.save(onnx_model, "nasneta_taso.onnx")
Beispiel #10
0
import taso
import onnx

#Build DNN model
graph = taso.new_graph()
input = graph.new_input(dims=(1,128,56,56))
w1 = graph.new_weight(dims=(128,128,3,3))
w2 = graph.new_weight(dims=(128,128,1,1))
w3 = graph.new_weight(dims=(128,128,3,3))
left = graph.conv2d(input=input, weight=w1, strides=(1,1), padding="SAME", activation="RELU")
left = graph.conv2d(input=input, weight=w3, strides=(1,1), padding="SAME")
right = graph.conv2d(input=input, weight=w2, strides=(1,1), padding="SAME", activation="RELU")
output = graph.add(left, right)
output = graph.relu(output)

#Optimize DNN model
new_graph = taso.optimize(graph)
onnx_model = taso.export_onnx(new_graph)
onnx.save(onnx_model, "arbitrary_DNN.onnx")
Beispiel #11
0
def inception_logits(graph, v):
    return pool2d(graph, v, pool_type='global_avg')


def inception_v3(batch_size=1):
    graph = taso.new_graph()
    v = graph.new_input(dims=(batch_size, 3, 299, 299))
    v = inception_front(graph, v)
    v = inception_a(graph, v, 32)
    v = inception_a(graph, v, 64)
    v = inception_a(graph, v, 64)
    v = inception_b(graph, v)
    v = inception_c(graph, v, 128)
    v = inception_c(graph, v, 160)
    v = inception_c(graph, v, 160)
    v = inception_c(graph, v, 192)
    v = inception_d(graph, v)
    v = inception_e(graph, v)
    v = inception_e(graph, v)
    v = inception_logits(graph, v)
    return graph


graph = inception_v3(
    batch_size=32)  # change batch_size from 4 to 8 would cause error.
opt_graph = taso.optimize(graph, alpha=1.0, budget=30)

print(graph.run_time())
print(opt_graph.run_time())