#Initialize pyprof pyprof.init() class Foo(torch.autograd.Function): @staticmethod def forward(ctx, in1, in2): out = in1 + in2 #This could be a custom C/C++ function. return out @staticmethod def backward(ctx, grad): in1_grad = grad #This could be a custom C/C++ function. in2_grad = grad #This could be a custom C/C++ function. return in1_grad, in2_grad #Hook the forward and backward functions to pyprof pyprof.wrap(Foo, 'forward') pyprof.wrap(Foo, 'backward') foo = Foo.apply x = torch.ones(4, 4).cuda() y = torch.ones(4, 4).cuda() with torch.autograd.profiler.emit_nvtx(): profiler.start() z = foo(x, y) profiler.stop()
def __init__(self, size): super(Foo, self).__init__() self.n = torch.nn.Parameter(torch.ones(size)) self.m = torch.nn.Parameter(torch.ones(size)) def forward(self, input): return self.n * input + self.m foo = Foo(4) foo.cuda() x = torch.ones(4).cuda() #JIT the class using tracing traced_foo = torch.jit.trace(foo, x) #Initialize pyprof after the JIT step pyprof.init() #Assign a name to the object "traced_foo" traced_foo.__dict__['__name__'] = "foo" #Hook up the forward function to pyprof pyprof.wrap(traced_foo, 'forward') with torch.autograd.profiler.emit_nvtx(): profiler.start() z = traced_foo(x) profiler.stop() print(z)
# # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import torch import fused_layer_norm_cuda from apex.normalization import FusedLayerNorm import pyprof pyprof.init() pyprof.wrap(fused_layer_norm_cuda, 'forward') pyprof.wrap(fused_layer_norm_cuda, 'backward') pyprof.wrap(fused_layer_norm_cuda, 'forward_affine') pyprof.wrap(fused_layer_norm_cuda, 'backward_affine') input = torch.randn(20, 5, 10, 10).cuda() # With Learnable Parameters m = FusedLayerNorm(input.size()[1:]).cuda() output = m(input) # Without Learnable Parameters m = FusedLayerNorm(input.size()[1:], elementwise_affine=False).cuda() output = m(input) # Normalize over last two dimensions
import torch from apex.optimizers import FusedAdam import amp_C import pyprof pyprof.init() # Wrap the custom fused multi tensor Adam implementation pyprof.wrap(amp_C, 'multi_tensor_adam') inp = 1024 hid = 2048 out = 4096 batch = 128 # Model model = torch.nn.Sequential( torch.nn.Linear(inp, hid).cuda().half(), torch.nn.ReLU(), torch.nn.Linear(hid, out).cuda().half()) # Loss criterion = torch.nn.CrossEntropyLoss().cuda() # Adam optimizer optimizer = FusedAdam(model.parameters()) # Input x = torch.ones(batch, inp).cuda().half() # Target target = torch.empty(batch, dtype=torch.long).random_(out).cuda() with torch.autograd.profiler.emit_nvtx(): y = model(x) loss = criterion(y, target) optimizer.zero_grad()
import torch import torch.cuda.profiler as profiler import pyprof #The following creates an object "foo" of type ScriptModule #The new object has a function called "forward" @torch.jit.script def foo(x, y): return torch.sigmoid(x) + y #Initialize pyprof after the JIT step pyprof.init() #Assign a name to the object "foo" foo.__name__ = "foo" #Hook up the forward function to pyprof pyprof.wrap(foo, 'forward') x = torch.zeros(4, 4).cuda() y = torch.ones(4, 4).cuda() with torch.autograd.profiler.emit_nvtx(): profiler.start() z = foo(x, y) profiler.stop() print(z)
# You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import torch import fused_adam_cuda from apex.optimizers import FusedAdam, FP16_Optimizer import pyprof pyprof.init() pyprof.wrap(fused_adam_cuda, 'adam') model = torch.nn.Linear(10, 20).cuda().half() criterion = torch.nn.CrossEntropyLoss().cuda() optimizer = FusedAdam(model.parameters()) optimizer = FP16_Optimizer(optimizer) x = torch.ones(32, 10).cuda().half() target = torch.empty(32, dtype=torch.long).random_(20).cuda() y = model(x) loss = criterion(y, target) optimizer.zero_grad() loss.backward() optimizer.step()