Example #1
0
def main():
	args = parseArgs()

	pyprof2.init()
	pyprof2.wrap(fused_adam_cuda, 'adam')

	N = args.b
	C = 3
	H = d[args.m]['H']
	W = d[args.m]['W']
	opts = d[args.m]['opts']
	classes = 1000

	net = getattr(models, args.m)
	net = net(**opts).cuda().half()
	net.train()

	x = torch.rand(N, C, H, W).cuda().half()
	target = torch.empty(N, dtype=torch.long).random_(classes).cuda()

	criterion = nn.CrossEntropyLoss().cuda()
	if (args.o == "sgd"):
		optimizer = torch.optim.SGD(net.parameters(), lr = 0.01, momentum=0.9)
	elif (args.o == "adam"):
		optimizer = FusedAdam(net.parameters())
		#optimizer = FP16_Optimizer(optimizer)
	else:
		assert False

	#Warm up without profiler
	for i in range(2):
		output = net(x)
		loss = criterion(output, target)
		optimizer.zero_grad()
		loss.backward()
		optimizer.step()

	with torch.autograd.profiler.emit_nvtx():
		profiler.start()
		output = net(x)
		loss = criterion(output, target)
		optimizer.zero_grad()
		loss.backward()
		optimizer.step()
		profiler.stop()
import torch
import torch.cuda.profiler as profiler
import pyprof2


def foo(x, y):
    return torch.sigmoid(x) + y


x = torch.zeros(4, 4).cuda()
y = torch.ones(4, 4).cuda()

#JIT the function using tracing
#This returns an object of type ScriptModule with a forward method.
traced_foo = torch.jit.trace(foo, (x, y))

#Initialize pyprof2 after the JIT step
pyprof2.init()

#Assign a name to the object "traced_foo"
traced_foo.__dict__['__name__'] = "foo"

#Hook up the forward function to pyprof2
pyprof2.wrap(traced_foo, 'forward')

with torch.autograd.profiler.emit_nvtx():
    profiler.start()
    z = traced_foo(x, y)
    profiler.stop()
    print(z)
Example #3
0
import torch
import fused_adam_cuda
from apex.optimizers import FusedAdam, FP16_Optimizer
import pyprof2

pyprof2.init()
pyprof2.wrap(fused_adam_cuda, 'adam')

model = torch.nn.Linear(10, 20).cuda().half()
criterion = torch.nn.CrossEntropyLoss().cuda()
optimizer = FusedAdam(model.parameters())
optimizer = FP16_Optimizer(optimizer)

x = torch.ones(32, 10).cuda().half()
target = torch.empty(32, dtype=torch.long).random_(20).cuda()
y = model(x)
loss = criterion(y, target)
optimizer.zero_grad()
loss.backward()
optimizer.step()
Example #4
0
import torch
import fused_layer_norm_cuda
from apex.normalization import FusedLayerNorm
import pyprof2

pyprof2.init()
pyprof2.wrap(fused_layer_norm_cuda, 'forward')
pyprof2.wrap(fused_layer_norm_cuda, 'backward')
pyprof2.wrap(fused_layer_norm_cuda, 'forward_affine')
pyprof2.wrap(fused_layer_norm_cuda, 'backward_affine')

input = torch.randn(20, 5, 10, 10).cuda()

# With Learnable Parameters
m = FusedLayerNorm(input.size()[1:]).cuda()
output = m(input)

# Without Learnable Parameters
m = FusedLayerNorm(input.size()[1:], elementwise_affine=False).cuda()
output = m(input)

# Normalize over last two dimensions
m = FusedLayerNorm([10, 10]).cuda()
output = m(input)

# Normalize over last dimension of size 10
m = FusedLayerNorm(10).cuda()
output = m(input)
Example #5
0
#!/usr/bin/env python3

import torch
import torch.cuda.profiler as profiler
import pyprof2

pyprof2.init()


class Foo(torch.nn.Module):
    def __init__(self, size):
        super(Foo, self).__init__()
        self.n = torch.nn.Parameter(torch.ones(size))
        self.m = torch.nn.Parameter(torch.ones(size))

    def forward(self, input):
        return self.n * input + self.m


#Hook the forward function to pyprof2
pyprof2.wrap(Foo, 'forward')

foo = Foo(4)
foo.cuda()
x = torch.ones(4).cuda()

with torch.autograd.profiler.emit_nvtx():
    profiler.start()
    z = foo(x)
    profiler.stop()
Example #6
0
import torch
from apex.optimizers import FusedAdam
import amp_C
import pyprof2

pyprof2.init()
# Wrap the custom fused multi tensor Adam implementation
pyprof2.wrap(amp_C, 'multi_tensor_adam')

inp = 1024
hid = 2048
out = 4096
batch = 128

# Model
model = torch.nn.Sequential(
    torch.nn.Linear(inp, hid).cuda().half(), torch.nn.ReLU(),
    torch.nn.Linear(hid, out).cuda().half())
# Loss
criterion = torch.nn.CrossEntropyLoss().cuda()
# Adam optimizer
optimizer = FusedAdam(model.parameters())
# Input
x = torch.ones(batch, inp).cuda().half()
# Target
target = torch.empty(batch, dtype=torch.long).random_(out).cuda()

with torch.autograd.profiler.emit_nvtx():
    y = model(x)
    loss = criterion(y, target)
    optimizer.zero_grad()