Ejemplo n.º 1
0
    def iterate(self, src, tgt, update=True, training=True):
        """
        Performs one iteration of the training/validation.

        :param src: batch of examples from the source language
        :param tgt: batch of examples from the target language
        :param update: if True: optimizer does update of the weights
        :param training: if True: executes optimizer
        """
        pyprof2.init()
        src, src_length = src
        tgt, tgt_length = tgt
        src = src.to(self.device)
        tgt = tgt.to(self.device)
        src_length = src_length.to(self.device)

        num_toks = {}
        num_toks['tgt'] = int(sum(tgt_length - 1))
        num_toks['src'] = int(sum(src_length))

        with torch.autograd.profiler.emit_nvtx():
            profiler.start()

            if self.batch_first:
                output = self.model(src, src_length, tgt[:, :-1])
                tgt_labels = tgt[:, 1:]
                T, B = output.size(1), output.size(0)
            else:
                output = self.model(src, src_length, tgt[:-1])
                tgt_labels = tgt[1:]
                T, B = output.size(0), output.size(1)

            loss = self.criterion(output.view(T * B, -1),
                                  tgt_labels.contiguous().view(-1))

            loss_per_batch = loss.item()
            loss /= (B * self.iter_size)

            if training:
                self.fp_optimizer.step(loss, self.optimizer, self.scheduler,
                                       update)

            loss_per_token = loss_per_batch / num_toks['tgt']
            loss_per_sentence = loss_per_batch / B

            profiler.stop()

        print('You can stop now')
        exit()

        return loss_per_token, loss_per_sentence, num_toks
Ejemplo n.º 2
0
def main():
	args = parseArgs()

	pyprof2.init()
	pyprof2.wrap(fused_adam_cuda, 'adam')

	N = args.b
	C = 3
	H = d[args.m]['H']
	W = d[args.m]['W']
	opts = d[args.m]['opts']
	classes = 1000

	net = getattr(models, args.m)
	net = net(**opts).cuda().half()
	net.train()

	x = torch.rand(N, C, H, W).cuda().half()
	target = torch.empty(N, dtype=torch.long).random_(classes).cuda()

	criterion = nn.CrossEntropyLoss().cuda()
	if (args.o == "sgd"):
		optimizer = torch.optim.SGD(net.parameters(), lr = 0.01, momentum=0.9)
	elif (args.o == "adam"):
		optimizer = FusedAdam(net.parameters())
		#optimizer = FP16_Optimizer(optimizer)
	else:
		assert False

	#Warm up without profiler
	for i in range(2):
		output = net(x)
		loss = criterion(output, target)
		optimizer.zero_grad()
		loss.backward()
		optimizer.step()

	with torch.autograd.profiler.emit_nvtx():
		profiler.start()
		output = net(x)
		loss = criterion(output, target)
		optimizer.zero_grad()
		loss.backward()
		optimizer.step()
		profiler.stop()
Ejemplo n.º 3
0
#!/usr/bin/env python3
"""
This file checks all Python operators.
"""

import sys
import torch
import torch.cuda.profiler as profiler
import operator
import inspect

#Import and initialize pyprof2
import pyprof2

pyprof2.init()

X = 1024
Y = 1024

fa = torch.rand(X, Y).cuda()
fb = torch.rand(X, Y).cuda()
fc = torch.rand(X, Y).cuda()

ia = torch.randint(0, 100, (X, Y)).cuda()
ib = torch.randint(0, 100, (X, Y)).cuda()

sa = torch.ones(1, 1).cuda()
sb = torch.ones(1, 1).cuda()

ba = fa.byte()