예제 #1
0
    def feed_data(self, data_loader, training=True):
        if training:
            assert self.optimizer is not None
        batch_size = data_loader.batch_size

        losses_per_token = AverageMeter()
        loss_per_sentence = AverageMeter()
        num_toks = AverageMeter()

        start_time = 0
        end_time = 0
        t0 = time.time()
        for i, (src, tgt, _) in enumerate(data_loader):
            if i == self.profile_start:
                if self.cupti:
                    start_cupti_tracing()
                elif self.nsight:
                    torch.cuda.profiler.start()
                start_time = time.time()

            if i == self.profile_stop:
                end_time = time.time()
                if self.cupti:
                    end_cupti_tracing()
                elif self.nsight:
                    torch.cuda.profiler.stop()

            if i == self.profile_stop:
                break
            self.save_counter += 1
            # measure data loading time

            # do a train/evaluate iteration
            stats = self.iterate(src, tgt, training=training)
            loss_per_token, loss_per_sentence, num_toks = stats

            save_chkpt = (self.save_counter %
                          self.save_freq) == (self.save_freq - 1)
            if training and save_chkpt:
                self.save_counter = 0
                self.save_info['iteration'] = i
                identifier = next(self.checkpoint_counter, -1)
                if identifier != -1:
                    with sync_workers() as rank:
                        if rank == 0:
                            self.save(identifier=identifier)
            t1 = time.time()
            print("iteration {}: {} ms".format(i, (t1 - t0) * 1000))
            t0 = t1
        print("average time {:.2f} ms".format(
            (end_time - start_time) * 1000 /
            (self.profile_stop - self.profile_start)))
        return losses_per_token.avg
예제 #2
0
    def feed_data(self, data_loader, training=True):
        if training:
            assert self.optimizer is not None
        batch_time = AverageMeter()
        data_time = AverageMeter()
        losses_per_token = AverageMeter()
        losses_per_sentence = AverageMeter()

        tot_tok_time = AverageMeter()
        src_tok_time = AverageMeter()
        tgt_tok_time = AverageMeter()

        batch_size = data_loader.batch_size

        end = time.time()
        for i, (src, tgt, _) in enumerate(data_loader):
            print("iteration {}".format(i))
            if i >= self.num_steps and self.num_steps > 0:
                break
            if i == 5 and self.cupti:
                start_cupti_tracing()
            self.save_counter += 1
            # measure data loading time
            data_time.update(time.time() - end)

            # do a train/evaluate iteration
            stats = self.iterate(src, tgt, training=training)
            loss_per_token, loss_per_sentence, num_toks = stats

            # measure accuracy and record loss
            losses_per_token.update(loss_per_token, num_toks['tgt'])
            losses_per_sentence.update(loss_per_sentence, batch_size)

            # measure elapsed time
            elapsed = time.time() - end
            batch_time.update(elapsed)
            src_tok_time.update(num_toks['src'] / elapsed)
            tgt_tok_time.update(num_toks['tgt'] / elapsed)
            tot_num_toks = num_toks['tgt'] + num_toks['src']
            tot_tok_time.update(tot_num_toks / elapsed)
            self.loss = losses_per_token.avg
            end = time.time()

            if i % self.print_freq == 0:
                phase = 'TRAIN' if training else 'EVAL'
                log = []
                log += ['{} [{}][{}/{}]'.format(phase, self.epoch, i, len(data_loader))]
                log += ['Time {:.3f} ({:.3f})'.format(batch_time.val, batch_time.avg)]
                log += ['Data {:.3f} ({:.3f})'.format(data_time.val, data_time.avg)]
                log += ['Tok/s {:.0f} ({:.0f})'.format(tot_tok_time.val, tot_tok_time.avg)]
                if self.verbose:
                    log += ['Src tok/s {:.0f} ({:.0f})'.format(src_tok_time.val, src_tok_time.avg)]
                    log += ['Tgt tok/s {:.0f} ({:.0f})'.format(tgt_tok_time.val, tgt_tok_time.avg)]
                    log += ['Loss/sentence {:.1f} ({:.1f})'.format(losses_per_sentence.val, losses_per_sentence.avg)]
                log += ['Loss/tok {:.8f} ({:.8f})'.format(losses_per_token.val, losses_per_token.avg)]
                log = '\t'.join(log)
                print(log)
                #logging.info(log)

            save_chkpt = (self.save_counter % self.save_freq) == (self.save_freq - 1)
            if training and save_chkpt:
                self.save_counter = 0
                self.save_info['iteration'] = i
                identifier = next(self.checkpoint_counter, -1)
                if identifier != -1:
                    with sync_workers() as rank:
                        if rank == 0:
                            self.save(identifier=identifier)
        if self.cupti:
            end_cupti_tracing()
        return losses_per_token.avg
예제 #3
0
    def feed_data(self, data_loader, training=True):
        """
        Runs training or validation on batches from data_loader.

        :param data_loader: data loader
        :param training: if True runs training else runs validation
        """
        if training:
            assert self.optimizer is not None
            eval_fractions = np.linspace(0, 1, self.intra_epoch_eval + 2)[1:-1]
            iters_with_update = len(data_loader) // self.iter_size
            eval_iters = (eval_fractions * iters_with_update).astype(int)
            eval_iters = eval_iters * self.iter_size
            eval_iters = set(eval_iters)

        batch_time = AverageMeter()
        data_time = AverageMeter()
        losses_per_token = AverageMeter(skip_first=False)
        losses_per_sentence = AverageMeter(skip_first=False)

        tot_tok_time = AverageMeter()
        src_tok_time = AverageMeter()
        tgt_tok_time = AverageMeter()

        batch_size = data_loader.batch_size

        end = time.time()
        for i, (src, tgt) in enumerate(data_loader):
            self.save_counter += 1
            # measure data loading time
            data_time.update(time.time() - end)

            update = False
            if i % self.iter_size == self.iter_size - 1:
                update = True

            # do a train/evaluate iteration
            stats = self.iterate(src, tgt, update, training=training)
            loss_per_token, loss_per_sentence, num_toks = stats

            # measure accuracy and record loss
            losses_per_token.update(loss_per_token, num_toks['tgt'])
            losses_per_sentence.update(loss_per_sentence, batch_size)

            # measure elapsed time
            elapsed = time.time() - end
            batch_time.update(elapsed)
            src_tok_time.update(num_toks['src'] / elapsed)
            tgt_tok_time.update(num_toks['tgt'] / elapsed)
            tot_num_toks = num_toks['tgt'] + num_toks['src']
            tot_tok_time.update(tot_num_toks / elapsed)
            self.loss = losses_per_token.avg

            if training and i in eval_iters:
                test_bleu, _ = self.translator.run(calc_bleu=True,
                                                   epoch=self.epoch,
                                                   iteration=i)

                log = []
                log += [f'TRAIN [{self.epoch}][{i}/{len(data_loader)}]']
                log += [f'BLEU: {test_bleu:.2f}']
                log = '\t'.join(log)
                logging.info(log)

                self.model.train()
                self.preallocate(data_loader, training=True)

            if i % self.print_freq == 0:
                phase = 'TRAIN' if training else 'VALIDATION'
                log = []
                log += [f'{phase} [{self.epoch}][{i}/{len(data_loader)}]']
                log += [f'Time {batch_time.val:.3f} ({batch_time.avg:.3f})']
                log += [f'Data {data_time.val:.2e} ({data_time.avg:.2e})']
                log += [
                    f'Tok/s {tot_tok_time.val:.0f} ({tot_tok_time.avg:.0f})'
                ]
                if self.verbose:
                    log += [
                        f'Src tok/s {src_tok_time.val:.0f} ({src_tok_time.avg:.0f})'
                    ]
                    log += [
                        f'Tgt tok/s {tgt_tok_time.val:.0f} ({tgt_tok_time.avg:.0f})'
                    ]
                    log += [
                        f'Loss/sentence {losses_per_sentence.val:.1f} ({losses_per_sentence.avg:.1f})'
                    ]
                log += [
                    f'Loss/tok {losses_per_token.val:.4f} ({losses_per_token.avg:.4f})'
                ]
                if training:
                    lr = self.optimizer.param_groups[0]['lr']
                    log += [f'LR {lr:.3e}']
                log = '\t'.join(log)
                logging.info(log)

            save_chkpt = (self.save_counter %
                          self.save_freq) == (self.save_freq - 1)
            if training and save_chkpt:
                self.save_counter = 0
                self.save_info['iteration'] = i
                identifier = next(self.checkpoint_counter, -1)
                if identifier != -1:
                    with sync_workers() as rank:
                        if rank == 0:
                            self.save(identifier=identifier)

            end = time.time()

        tot_tok_time.reduce('sum')
        losses_per_token.reduce('mean')

        return losses_per_token.avg, tot_tok_time.avg
예제 #4
0
    def feed_data(self, data_loader, training=True):
        if training:
            assert self.optimizer is not None
        batch_time = AverageMeter()
        data_time = AverageMeter()
        losses_per_token = AverageMeter()
        losses_per_sentence = AverageMeter()

        tot_tok_time = AverageMeter()
        src_tok_time = AverageMeter()
        tgt_tok_time = AverageMeter()

        batch_size = data_loader.batch_size

        end = time.time()
        for i, (src, tgt, _) in enumerate(data_loader):
            self.save_counter += 1
            # measure data loading time
            data_time.update(time.time() - end)

            # do a train/evaluate iteration
            stats = self.iterate(src, tgt, training=training)
            loss_per_token, loss_per_sentence, num_toks = stats

            # measure accuracy and record loss
            losses_per_token.update(loss_per_token, num_toks['tgt'])
            losses_per_sentence.update(loss_per_sentence, batch_size)

            # measure elapsed time
            elapsed = time.time() - end
            batch_time.update(elapsed)
            src_tok_time.update(num_toks['src'] / elapsed)
            tgt_tok_time.update(num_toks['tgt'] / elapsed)
            tot_num_toks = num_toks['tgt'] + num_toks['src']
            tot_tok_time.update(tot_num_toks / elapsed)
            self.loss = losses_per_token.avg
            end = time.time()

            if i % self.print_freq == 0:
                phase = 'TRAIN' if training else 'EVAL'
                log = []
                log += [f'{phase} [{self.epoch}][{i}/{len(data_loader)}]']
                log += [f'Time {batch_time.val:.3f} ({batch_time.avg:.3f})']
                log += [f'Data {data_time.val:.3f} ({data_time.avg:.3f})']
                log += [
                    f'Tok/s {tot_tok_time.val:.0f} ({tot_tok_time.avg:.0f})'
                ]
                if self.verbose:
                    log += [
                        f'Src tok/s {src_tok_time.val:.0f} ({src_tok_time.avg:.0f})'
                    ]
                    log += [
                        f'Tgt tok/s {tgt_tok_time.val:.0f} ({tgt_tok_time.avg:.0f})'
                    ]
                    log += [
                        f'Loss/sentence {losses_per_sentence.val:.1f} ({losses_per_sentence.avg:.1f})'
                    ]
                log += [
                    f'Loss/tok {losses_per_token.val:.8f} ({losses_per_token.avg:.8f})'
                ]
                log = '\t'.join(log)
                logging.info(log)

            save_chkpt = (self.save_counter %
                          self.save_freq) == (self.save_freq - 1)
            if training and save_chkpt:
                self.save_counter = 0
                self.save_info['iteration'] = i
                identifier = next(self.checkpoint_counter, -1)
                if identifier != -1:
                    with sync_workers() as rank:
                        if rank == 0:
                            self.save(identifier=identifier)

        return losses_per_token.avg
예제 #5
0
    def feed_data(self, data_loader, training=True):
        """
        Runs training or validation on batches from data_loader.

        :param data_loader: data loader
        :param training: if True runs training else runs validation
        """
        if training:
            assert self.optimizer is not None
            eval_fractions = np.linspace(0, 1, self.intra_epoch_eval + 2)[1:-1]
            eval_iters = (eval_fractions * len(data_loader)).astype(int)
            eval_iters = set(eval_iters)

        batch_time = AverageMeter()
        data_time = AverageMeter()
        losses_per_token = AverageMeter()
        losses_per_sentence = AverageMeter()

        tot_tok_time = AverageMeter()
        src_tok_time = AverageMeter()
        tgt_tok_time = AverageMeter()

        batch_size = data_loader.batch_size
        layer_timestamps = []
        verbose = True

        module_whitelist = ["EmuBidirLSTM", "RecurrentAttention", "Classifier"]

        for i, (src, tgt) in enumerate(data_loader):
            break
        (src, src_length) = src
        (tgt, tgt_length) = tgt
        src_length = torch.LongTensor(src_length).cuda()
        src = src.cuda()
        tgt = tgt.cuda()
        model_input = (src, src_length, tgt[:-1])
        summary = torchsummary.summary(model=self.model,
                                       module_whitelist=module_whitelist,
                                       model_input=model_input,
                                       verbose=True)

        end = time.time()
        NUM_STEPS_TO_PROFILE = 100  # profile 100 steps
        for i, (src, tgt) in enumerate(data_loader):
            self.save_counter += 1
            # measure data loading time
            data_time.update(time.time() - end)

            with torchprofiler.Profiling(self.model, module_whitelist) as p:
                # do a train/evaluate iteration
                stats = self.iterate(src, tgt, training=training)
                loss_per_token, loss_per_sentence, num_toks = stats
            print(str(p))
            layer_timestamps.append(p.processed_times())

            # measure accuracy and record loss
            losses_per_token.update(loss_per_token, num_toks['tgt'])
            losses_per_sentence.update(loss_per_sentence, batch_size)

            # measure elapsed time
            elapsed = time.time() - end
            batch_time.update(elapsed)
            src_tok_time.update(num_toks['src'] / elapsed)
            tgt_tok_time.update(num_toks['tgt'] / elapsed)
            tot_num_toks = num_toks['tgt'] + num_toks['src']
            tot_tok_time.update(tot_num_toks / elapsed)
            self.loss = losses_per_token.avg

            if training and i in eval_iters:
                test_bleu, _ = self.translator.run(calc_bleu=True,
                                                   epoch=self.epoch,
                                                   iteration=i)

                log = []
                log += [f'TRAIN [{self.epoch}][{i}/{len(data_loader)}]']
                log += [f'BLEU: {test_bleu:.2f}']
                log = '\t'.join(log)
                logging.info(log)

                self.model.train()
                self.preallocate(data_loader, training=True)

            if i % self.print_freq == 0:
                phase = 'TRAIN' if training else 'VALIDATION'
                log = []
                log += [f'{phase} [{self.epoch}][{i}/{len(data_loader)}]']
                log += [f'Time {batch_time.val:.3f} ({batch_time.avg:.3f})']
                log += [f'Data {data_time.val:.5f} ({data_time.avg:.5f})']
                log += [
                    f'Tok/s {tot_tok_time.val:.0f} ({tot_tok_time.avg:.0f})'
                ]
                if self.verbose:
                    log += [
                        f'Src tok/s {src_tok_time.val:.0f} ({src_tok_time.avg:.0f})'
                    ]
                    log += [
                        f'Tgt tok/s {tgt_tok_time.val:.0f} ({tgt_tok_time.avg:.0f})'
                    ]
                    log += [
                        f'Loss/sentence {losses_per_sentence.val:.1f} ({losses_per_sentence.avg:.1f})'
                    ]
                log += [
                    f'Loss/tok {losses_per_token.val:.4f} ({losses_per_token.avg:.4f})'
                ]
                lr = [
                    param_group['lr']
                    for param_group in self.optimizer.param_groups
                ]
                log += [f'Learning Rate {lr}']
                log = '\t'.join(log)
                logging.info(log)

            if i >= NUM_STEPS_TO_PROFILE:
                break

            save_chkpt = (self.save_counter %
                          self.save_freq) == (self.save_freq - 1)
            if training and save_chkpt:
                self.save_counter = 0
                self.save_info['iteration'] = i
                identifier = next(self.checkpoint_counter, -1)
                if identifier != -1:
                    with sync_workers() as rank:
                        if rank == 0:
                            self.save(identifier=identifier)

            end = time.time()

        if verbose:
            print(
                "\n==========================================================")
            print("Layer Type    Forward Time (ms)    Backward Time (ms)")
            print("==========================================================")

        tot_accounted_time = 0.0
        per_layer_times = []
        for i in range(len(layer_timestamps[0])):
            layer_type = str(layer_timestamps[0][i][0])
            layer_forward_time_sum = 0.0
            layer_backward_time_sum = 0.0
            for j in range(len(layer_timestamps)):
                layer_forward_time_sum += (layer_timestamps[j][i][2] / 1000)
                layer_backward_time_sum += (layer_timestamps[j][i][5] / 1000)
            per_layer_times.append(
                (layer_type, layer_forward_time_sum / len(layer_timestamps),
                 layer_backward_time_sum / len(layer_timestamps)))
            if verbose:
                print(per_layer_times[-1][0], per_layer_times[-1][1],
                      per_layer_times[-1][2])
            tot_accounted_time += (per_layer_times[-1][1] +
                                   per_layer_times[-1][2])

        print("Total accounted time: %.3f ms" % tot_accounted_time)

        summary_i = 0
        per_layer_times_i = 0
        last_summary_i = -1
        last_per_layer_times_i = -1
        while len(per_layer_times) > 0:
            per_layer_time = per_layer_times.pop(0)
            for summary_i in range(len(summary)):
                summary_elem = summary[summary_i]
                if str(summary_elem['layer_name']) != str(per_layer_time[0]):
                    continue
                if 'forward_time' in summary_elem and 'backward_time' in summary_elem:
                    continue
                summary_elem['forward_time'] = per_layer_time[1]
                summary_elem['backward_time'] = per_layer_time[2]
                break

        if training:
            create_graph(self.model, module_whitelist, (src, tgt), summary,
                         os.path.join("profiles", self.arch))

        tot_tok_time.reduce('sum')
        losses_per_token.reduce('mean')

        return losses_per_token.avg, tot_tok_time.avg