예제 #1
0
    def train_epoch(self,
                    loader_src,
                    loader_tar_ul,
                    loader_tar_l,
                    optimizer,
                    epoch,
                    augmenter=None,
                    print_stats=1,
                    writer=None,
                    write_images=False,
                    device=0):
        """
        Trains the network for one epoch
        :param loader_src: source dataloader (labeled)
        :param loader_tar_ul: target dataloader (unlabeled)
        :param loader_tar_l: target dataloader (labeled)
        :param optimizer: optimizer for the loss function
        :param epoch: current epoch
        :param augmenter: data augmenter
        :param print_stats: frequency of printing statistics
        :param writer: summary writer
        :param write_images: frequency of writing images
        :param device: GPU device where the computations should occur
        :return: average training loss over the epoch
        """
        # perform training on GPU/CPU
        module_to_device(self, device)
        self.train()

        # keep track of the average loss during the epoch
        loss_seg_src_cum = 0.0
        loss_seg_tar_cum = 0.0
        total_loss_cum = 0.0
        cnt = 0

        # zip dataloaders
        if loader_tar_l is None:
            dl = zip(loader_src)
        else:
            dl = zip(loader_src, loader_tar_l)

        # start epoch
        time_start = datetime.datetime.now()
        for i, data in enumerate(dl):

            # transfer to suitable device
            data_src = tensor_to_device(data[0], device)
            if loader_tar_l is not None:
                data_tar_l = tensor_to_device(data[1], device)

            # augment if necessary
            if loader_tar_l is None:
                data_aug = (data_src[0], data_src[1])
                x_src, y_src = augment_samples(data_aug, augmenter=augmenter)
            else:
                data_aug = (data_src[0], data_src[1])
                x_src, y_src = augment_samples(data_aug, augmenter=augmenter)
                data_aug = (data_tar_l[0], data_tar_l[1])
                x_tar_l, y_tar_l = augment_samples(data_aug,
                                                   augmenter=augmenter)
                y_tar_l = get_labels(y_tar_l, coi=self.coi, dtype=int)

            # zero the gradient buffers
            self.zero_grad()

            # forward prop and compute loss
            loss_seg_tar = torch.Tensor([0])
            y_src_pred = self(x_src)
            loss_seg_src = self.seg_loss(y_src_pred, y_src[:, 0, ...])
            total_loss = loss_seg_src
            if loader_tar_l is not None:
                y_tar_l_pred = self(x_tar_l)
                loss_seg_tar = self.seg_loss(y_tar_l_pred, y_tar_l[:, 0, ...])
                total_loss = total_loss + loss_seg_tar

            loss_seg_src_cum += loss_seg_src.data.cpu().numpy()
            loss_seg_tar_cum += loss_seg_tar.data.cpu().numpy()
            total_loss_cum += total_loss.data.cpu().numpy()
            cnt += 1

            # backward prop
            total_loss.backward()

            # apply one step in the optimization
            optimizer.step()

            # print statistics of necessary
            if i % print_stats == 0:
                print(
                    '[%s] Epoch %5d - Iteration %5d/%5d - Loss seg src: %.6f - Loss seg tar: %.6f - Loss: %.6f'
                    % (datetime.datetime.now(), epoch, i,
                       len(loader_src.dataset) / loader_src.batch_size,
                       loss_seg_src_cum / cnt, loss_seg_tar_cum / cnt,
                       total_loss_cum / cnt))

        # keep track of time
        runtime = datetime.datetime.now() - time_start
        seconds = runtime.total_seconds()
        hours = seconds // 3600
        minutes = (seconds - hours * 3600) // 60
        seconds = seconds - hours * 3600 - minutes * 60
        print_frm(
            'Epoch %5d - Runtime for training: %d hours, %d minutes, %f seconds'
            % (epoch, hours, minutes, seconds))

        # don't forget to compute the average and print it
        loss_seg_src_avg = loss_seg_src_cum / cnt
        loss_seg_tar_avg = loss_seg_tar_cum / cnt
        total_loss_avg = total_loss_cum / cnt
        print(
            '[%s] Training Epoch %4d - Loss seg src: %.6f - Loss seg tar: %.6f - Loss: %.6f'
            % (datetime.datetime.now(), epoch, loss_seg_src_avg,
               loss_seg_tar_avg, total_loss_avg))

        # log everything
        if writer is not None:

            # always log scalars
            log_scalars([loss_seg_src_avg, loss_seg_tar_avg, total_loss_avg], [
                'train/' + s
                for s in ['loss-seg-src', 'loss-seg-tar', 'total-loss']
            ],
                        writer,
                        epoch=epoch)

            # log images if necessary
            if write_images:
                y_src_pred = F.softmax(y_src_pred, dim=1)[:, 1:2, :, :].data
                log_images_2d(
                    [x_src.data, y_src.data, y_src_pred],
                    ['train/' + s for s in ['src/x', 'src/y', 'src/y-pred']],
                    writer,
                    epoch=epoch)
                if loader_tar_l is not None:
                    y_tar_l_pred = F.softmax(y_tar_l_pred,
                                             dim=1)[:, 1:2, :, :].data
                    log_images_2d([x_tar_l.data, y_tar_l, y_tar_l_pred], [
                        'train/' + s
                        for s in ['tar/x-l', 'tar/y-l', 'tar/y-l-pred']
                    ],
                                  writer,
                                  epoch=epoch)

        return total_loss_avg
예제 #2
0
    def test_epoch(self,
                   loader_src,
                   loader_tar,
                   loss_seg_fn,
                   loss_rec_fn,
                   epoch,
                   writer=None,
                   write_images=False,
                   device=0):
        """
        Tests the network for one epoch
        :param loader_src: source dataloader (should be labeled)
        :param loader_tar: target dataloader (should be labeled)
        :param loss_seg_fn: segmentation loss function
        :param loss_rec_fn: reconstruction loss function
        :param epoch: current epoch
        :param writer: summary writer
        :param write_images: frequency of writing images
        :param device: GPU device where the computations should occur
        :return: average training loss over the epoch
        """
        # perform training on GPU/CPU
        module_to_device(self, device)
        self.eval()

        # keep track of the average loss and metrics during the epoch
        loss_seg_cum = 0.0
        loss_rec_cum = 0.0
        total_loss_cum = 0.0
        cnt = 0

        # start epoch
        y_src_preds = []
        ys_src = []
        y_tar_preds = []
        ys_tar = []
        for i, data in enumerate(zip(loader_src, loader_tar)):
            # get inputs and transfer to suitable device
            x_src, y_src = tensor_to_device(data[0], device)
            x_tar, y_tar = tensor_to_device(data[1], device)
            y_src = get_labels(y_src, coi=self.coi, dtype=int)
            y_tar = get_labels(y_tar, coi=self.coi, dtype=int)
            x_src = x_src.float()
            x_tar = x_tar.float()

            # zero the gradient buffers
            self.zero_grad()

            # forward prop
            y_src_pred = self(x_src)
            x_src_pred = self.reconstruction_outputs
            y_tar_pred = self(x_tar)
            x_tar_pred = self.reconstruction_outputs

            # compute loss
            loss_seg = loss_seg_fn(y_src_pred, y_src)
            loss_rec = 0.5 * (loss_rec_fn(x_src_pred, x_src) +
                              loss_rec_fn(x_tar_pred, x_tar))
            total_loss = loss_seg + self.lambda_rec * loss_rec
            loss_seg_cum += loss_seg.data.cpu().numpy()
            loss_rec_cum += loss_rec.data.cpu().numpy()
            total_loss_cum += total_loss.data.cpu().numpy()
            cnt += 1

            for b in range(y_src_pred.size(0)):
                y_src_preds.append(
                    F.softmax(y_src_pred, dim=1).data.cpu().numpy()[b, 1, ...])
                y_tar_preds.append(
                    F.softmax(y_tar_pred, dim=1).data.cpu().numpy()[b, 1, ...])
                ys_src.append(y_src[b, 0, ...].cpu().numpy())
                ys_tar.append(y_tar[b, 0, ...].cpu().numpy())

        # compute interesting metrics
        y_src_preds = np.asarray(y_src_preds)
        y_tar_preds = np.asarray(y_tar_preds)
        ys_src = np.asarray(ys_src)
        ys_tar = np.asarray(ys_tar)
        j_src = jaccard(ys_src, y_src_preds)
        j_tar = jaccard(ys_src, y_tar_preds)
        a_src, ba_src, p_src, r_src, f_src = accuracy_metrics(
            ys_src, y_src_preds)
        a_tar, ba_tar, p_tar, r_tar, f_tar = accuracy_metrics(
            ys_tar, y_tar_preds)

        # don't forget to compute the average and print it
        loss_seg_avg = loss_seg_cum / cnt
        loss_rec_avg = loss_rec_cum / cnt
        total_loss_avg = total_loss_cum / cnt
        print('[%s] Epoch %5d - Loss seg: %.6f - Loss rec: %.6f - Loss: %.6f' %
              (datetime.datetime.now(), epoch, loss_seg_avg, loss_rec_avg,
               total_loss_avg))

        # log everything
        if writer is not None:

            # always log scalars
            log_scalars([
                loss_seg_avg, loss_rec_avg, total_loss_avg, j_src, a_src,
                ba_src, p_src, r_src, f_src, j_tar, a_tar, ba_tar, p_tar,
                r_tar, f_tar
            ], [
                'test/' + s for s in [
                    'loss-rec', 'loss-seg', 'total-loss', 'src/jaccard',
                    'src/accuracy', 'src/balanced-accuracy', 'src/precision',
                    'src/recall', 'src/f-score', 'tar/jaccard', 'tar/accuracy',
                    'tar/balanced-accuracy', 'tar/precision', 'tar/recall',
                    'tar/f-score'
                ]
            ],
                        writer,
                        epoch=epoch)

            # log images if necessary
            if write_images:
                y_src_pred = F.softmax(y_src_pred, dim=1)[:, 1:2, ...].data
                y_tar_pred = F.softmax(y_tar_pred, dim=1)[:, 1:2, ...].data
                log_images_3d([
                    x_src, x_src_pred.data, y_src, y_src_pred, x_tar,
                    x_tar_pred.data, y_tar, y_tar_pred
                ], [
                    'test/' + s for s in [
                        'src/x', 'src/x-pred', 'src/y', 'src/y-pred', 'tar/x',
                        'tar/x-pred', 'tar/y', 'tar/y-pred'
                    ]
                ],
                              writer,
                              epoch=epoch)

        return total_loss_avg
예제 #3
0
파일: wnet.py 프로젝트: JorisRoels/ynet
    def train_epoch(self,
                    loader_src,
                    loader_tar_ul,
                    loader_tar_l,
                    optimizer,
                    epoch,
                    augmenter=None,
                    print_stats=1,
                    writer=None,
                    write_images=False,
                    device=0):
        """
        Trains the network for one epoch
        :param loader_src: source dataloader (labeled)
        :param loader_tar_ul: target dataloader (unlabeled)
        :param loader_tar_l: target dataloader (labeled)
        :param optimizer: optimizer for the loss function
        :param epoch: current epoch
        :param augmenter: data augmenter
        :param print_stats: frequency of printing statistics
        :param writer: summary writer
        :param write_images: frequency of writing images
        :param device: GPU device where the computations should occur
        :return: average training loss over the epoch
        """
        # perform training on GPU/CPU
        module_to_device(self, device)
        self.train()

        # keep track of the average loss during the epoch
        loss_seg_src_cum = 0.0
        loss_seg_tar_cum = 0.0
        loss_rec_src_cum = 0.0
        loss_rec_tar_cum = 0.0
        loss_dc_x_cum = 0.0
        loss_dc_y_cum = 0.0
        total_loss_cum = 0.0
        cnt = 0

        # zip dataloaders
        if loader_tar_l is None:
            dl = zip(loader_src, loader_tar_ul)
        else:
            dl = zip(loader_src, loader_tar_ul, loader_tar_l)

        # start epoch
        time_start = datetime.datetime.now()
        for i, data in enumerate(dl):

            # transfer to suitable device
            data_src = tensor_to_device(data[0], device)
            x_tar_ul = tensor_to_device(data[1], device)
            if loader_tar_l is not None:
                data_tar_l = tensor_to_device(data[2], device)

            # augment if necessary
            if loader_tar_l is None:
                data_aug = (data_src[0], data_src[1])
                x_src, y_src = augment_samples(data_aug, augmenter=augmenter)
                data_aug = (x_tar_ul, x_tar_ul)
                x_tar_ul, _ = augment_samples(data_aug, augmenter=augmenter)
            else:
                data_aug = (data_src[0], data_src[1])
                x_src, y_src = augment_samples(data_aug, augmenter=augmenter)
                data_aug = (x_tar_ul, x_tar_ul)
                x_tar_ul, _ = augment_samples(data_aug, augmenter=augmenter)
                data_aug = (data_tar_l[0], data_tar_l[1])
                x_tar_l, y_tar_l = augment_samples(data_aug,
                                                   augmenter=augmenter)
                y_tar_l = get_labels(y_tar_l, coi=self.coi, dtype=int)
            y_src = get_labels(y_src, coi=self.coi, dtype=int)
            x_tar_ul = x_tar_ul.float()

            # zero the gradient buffers
            self.zero_grad()

            # get domain labels for domain confusion
            dom_labels_x = tensor_to_device(
                torch.zeros((x_src.size(0) + x_tar_ul.size(0))),
                device).long()
            dom_labels_x[x_src.size(0):] = 1
            dom_labels_y = tensor_to_device(
                torch.zeros((x_src.size(0) + x_tar_ul.size(0))),
                device).long()
            dom_labels_y[x_src.size(0):] = 1

            # check train mode and compute loss
            loss_seg_src = torch.Tensor([0])
            loss_seg_tar = torch.Tensor([0])
            loss_rec_src = torch.Tensor([0])
            loss_rec_tar = torch.Tensor([0])
            loss_dc_x = torch.Tensor([0])
            loss_dc_y = torch.Tensor([0])
            if self.train_mode == RECONSTRUCTION:
                x_src_rec, x_src_rec_dom = self.forward_rec(x_src)
                x_tar_ul_rec, x_tar_ul_rec_dom = self.forward_rec(x_tar_ul)
                loss_rec_src = self.rec_loss(x_src_rec, x_src)
                loss_rec_tar = self.rec_loss(x_tar_ul, x_tar_ul_rec)
                loss_dc_x = self.dc_loss(
                    torch.cat((x_src_rec_dom, x_tar_ul_rec_dom), dim=0),
                    dom_labels_x)
                total_loss = loss_rec_src + loss_rec_tar + self.lambda_dc * loss_dc_x
            elif self.train_mode == SEGMENTATION:
                # switch between reconstructed and original inputs
                if np.random.rand() < self.p:
                    y_src_pred, y_src_pred_dom = self.forward_seg(x_src)
                else:
                    x_src_rec, _ = self.forward_rec(x_src)
                    y_src_pred, y_src_pred_dom = self.forward_seg(x_src_rec)
                    dom_labels_y[:x_src.size(0)] = 1
                if np.random.rand() < self.p:
                    y_tar_ul_pred, y_tar_ul_pred_dom = self.forward_seg(
                        x_tar_ul)
                else:
                    x_tar_ul_rec, _ = self.forward_rec(x_tar_ul)
                    y_tar_ul_pred, y_tar_ul_pred_dom = self.forward_seg(
                        x_tar_ul_rec)
                    dom_labels_y[x_src.size(0):] = 1
                loss_seg_src = self.seg_loss(y_src_pred, y_src[:, 0, ...])
                loss_dc_y = self.dc_loss(
                    torch.cat((y_src_pred_dom, y_tar_ul_pred_dom), dim=0),
                    dom_labels_y)
                total_loss = loss_seg_src + self.lambda_dc * loss_dc_y
                if loader_tar_l is not None:
                    y_tar_l_pred, _ = self.forward_seg(x_tar_l)
                    loss_seg_tar = self.seg_loss(y_tar_l_pred, y_tar_l[:, 0,
                                                                       ...])
                    total_loss = total_loss + loss_seg_tar
            else:
                x_src_rec, x_src_rec_dom = self.forward_rec(x_src)
                if np.random.rand() < self.p:
                    y_src_pred, y_src_pred_dom = self.forward_seg(x_src)
                else:
                    y_src_pred, y_src_pred_dom = self.forward_seg(x_src_rec)
                    dom_labels_y[:x_src.size(0)] = 1
                x_tar_ul_rec, x_tar_ul_rec_dom = self.forward_rec(x_tar_ul)
                if np.random.rand() < self.p:
                    y_tar_ul_pred, y_tar_ul_pred_dom = self.forward_seg(
                        x_tar_ul)
                else:
                    y_tar_ul_pred, y_tar_ul_pred_dom = self.forward_seg(
                        x_tar_ul_rec)
                    dom_labels_y[x_src.size(0):] = 1
                loss_rec_src = self.rec_loss(x_src_rec, x_src)
                loss_rec_tar = self.rec_loss(x_tar_ul, x_tar_ul_rec)
                loss_seg_src = self.seg_loss(y_src_pred, y_src[:, 0, ...])
                loss_dc_x = self.dc_loss(
                    torch.cat((x_src_rec_dom, x_tar_ul_rec_dom), dim=0),
                    dom_labels_x)
                loss_dc_y = self.dc_loss(
                    torch.cat((y_src_pred_dom, y_tar_ul_pred_dom), dim=0),
                    dom_labels_y)
                total_loss = loss_seg_src + self.lambda_rec * (loss_rec_src + loss_rec_tar) + \
                             self.lambda_dc * (loss_dc_x + loss_dc_y)
                if loader_tar_l is not None:
                    _, y_tar_l_pred, _, y_tar_l_pred_dom = self(x_tar_l)
                    loss_seg_tar = self.seg_loss(y_tar_l_pred, y_tar_l[:, 0,
                                                                       ...])
                    total_loss = total_loss + loss_seg_tar

            loss_seg_src_cum += loss_seg_src.data.cpu().numpy()
            loss_seg_tar_cum += loss_seg_tar.data.cpu().numpy()
            loss_rec_src_cum += loss_rec_src.data.cpu().numpy()
            loss_rec_tar_cum += loss_rec_tar.data.cpu().numpy()
            loss_dc_x_cum += loss_dc_x.data.cpu().numpy()
            loss_dc_y_cum += loss_dc_y.data.cpu().numpy()
            total_loss_cum += total_loss.data.cpu().numpy()
            cnt += 1

            # backward prop
            total_loss.backward()

            # apply one step in the optimization
            optimizer.step()

            # print statistics of necessary
            if i % print_stats == 0:
                print(
                    '[%s] Epoch %5d - Iteration %5d/%5d - Loss seg src: %.6f - Loss seg tar: %.6f - Loss rec src: %.6f - Loss rec tar: %.6f - Loss DCX: %.6f - Loss DCY: %.6f - Loss: %.6f'
                    % (datetime.datetime.now(), epoch, i,
                       len(loader_src.dataset) / loader_src.batch_size,
                       loss_seg_src_cum / cnt, loss_seg_tar_cum / cnt,
                       loss_rec_src_cum / cnt, loss_rec_tar_cum / cnt,
                       loss_dc_x_cum / cnt, loss_dc_y_cum / cnt,
                       total_loss_cum / cnt))

        # keep track of time
        runtime = datetime.datetime.now() - time_start
        seconds = runtime.total_seconds()
        hours = seconds // 3600
        minutes = (seconds - hours * 3600) // 60
        seconds = seconds - hours * 3600 - minutes * 60
        print_frm(
            'Epoch %5d - Runtime for training: %d hours, %d minutes, %f seconds'
            % (epoch, hours, minutes, seconds))

        # don't forget to compute the average and print it
        loss_seg_src_avg = loss_seg_src_cum / cnt
        loss_seg_tar_avg = loss_seg_tar_cum / cnt
        loss_rec_src_avg = loss_rec_src_cum / cnt
        loss_rec_tar_avg = loss_rec_tar_cum / cnt
        loss_dc_x_avg = loss_dc_x_cum / cnt
        loss_dc_y_avg = loss_dc_y_cum / cnt
        total_loss_avg = total_loss_cum / cnt
        print(
            '[%s] Training Epoch %4d - Loss seg src: %.6f - Loss seg tar: %.6f - Loss rec src: %.6f - Loss rec tar: %.6f - Loss DCX: %.6f - Loss DCY: %.6f - Loss: %.6f'
            % (datetime.datetime.now(), epoch, loss_seg_src_avg,
               loss_seg_tar_avg, loss_rec_src_avg, loss_rec_tar_avg,
               loss_dc_x_avg, loss_dc_y_avg, total_loss_avg))

        # log everything
        if writer is not None:

            # always log scalars
            if self.train_mode == RECONSTRUCTION:
                log_scalars(
                    [loss_rec_src_avg, loss_rec_tar_avg, loss_dc_x_avg], [
                        'train/' + s
                        for s in ['loss-rec-src', 'loss-rec-tar', 'loss-dc-x']
                    ],
                    writer,
                    epoch=epoch)
            elif self.train_mode == SEGMENTATION:
                log_scalars(
                    [loss_seg_src_avg, loss_seg_tar_avg, loss_dc_y_avg], [
                        'train/' + s
                        for s in ['loss-seg-src', 'loss-seg-tar', 'loss-dc-y']
                    ],
                    writer,
                    epoch=epoch)
            else:
                log_scalars([
                    loss_seg_src_avg, loss_seg_tar_avg, loss_rec_src_avg,
                    loss_rec_tar_avg, loss_dc_x_avg, loss_dc_y_avg
                ], [
                    'train/' + s for s in [
                        'loss-seg-src', 'loss-seg-tar', 'loss-rec-src',
                        'loss-rec-tar', 'loss-dc-x', 'loss-dc-y'
                    ]
                ],
                            writer,
                            epoch=epoch)
            log_scalars([total_loss_avg],
                        ['train/' + s for s in ['total-loss']],
                        writer,
                        epoch=epoch)

            # log images if necessary
            if write_images:
                log_images_2d([x_src.data], ['train/' + s for s in ['src/x']],
                              writer,
                              epoch=epoch)
                if self.train_mode == RECONSTRUCTION:
                    log_images_2d(
                        [x_src_rec.data, x_tar_ul.data, x_tar_ul_rec.data], [
                            'train/' + s
                            for s in ['src/x-rec', 'tar/x-ul', 'tar/x-ul-rec']
                        ],
                        writer,
                        epoch=epoch)
                elif self.train_mode == SEGMENTATION:
                    y_src_pred = F.softmax(y_src_pred, dim=1)[:,
                                                              1:2, :, :].data
                    log_images_2d(
                        [y_src.data, y_src_pred],
                        ['train/' + s for s in ['src/y', 'src/y-pred']],
                        writer,
                        epoch=epoch)
                    if loader_tar_l is not None:
                        y_tar_l_pred = F.softmax(y_tar_l_pred,
                                                 dim=1)[:, 1:2, :, :].data
                        log_images_2d([x_tar_l.data, y_tar_l, y_tar_l_pred], [
                            'train/' + s
                            for s in ['tar/x-l', 'tar/y-l', 'tar/y-l-pred']
                        ],
                                      writer,
                                      epoch=epoch)
                else:
                    y_src_pred = F.softmax(y_src_pred, dim=1)[:,
                                                              1:2, :, :].data
                    log_images_2d([
                        x_src_rec.data, y_src.data, y_src_pred, x_tar_ul.data,
                        x_tar_ul_rec.data
                    ], [
                        'train/' + s for s in [
                            'src/x-rec', 'src/y', 'src/y-pred', 'tar/x-ul',
                            'tar/x-ul-rec'
                        ]
                    ],
                                  writer,
                                  epoch=epoch)
                    if loader_tar_l is not None:
                        y_tar_l_pred = F.softmax(y_tar_l_pred,
                                                 dim=1)[:, 1:2, :, :].data
                        log_images_2d([x_tar_l.data, y_tar_l, y_tar_l_pred], [
                            'train/' + s
                            for s in ['tar/x-l', 'tar/y-l', 'tar/y-l-pred']
                        ],
                                      writer,
                                      epoch=epoch)

        return total_loss_avg
예제 #4
0
    def train_epoch_semi_supervised(self,
                                    loader_src,
                                    loader_tar_ul,
                                    loader_tar_l,
                                    loss_seg_fn,
                                    loss_rec_fn,
                                    optimizer,
                                    epoch,
                                    augmenter_src=None,
                                    augmenter_tar=None,
                                    print_stats=1,
                                    writer=None,
                                    write_images=False,
                                    device=0):
        """
        Trains the network for one epoch
        :param loader_src: source dataloader (labeled)
        :param loader_tar_ul: target dataloader (unlabeled)
        :param loader_tar_l: target dataloader (labeled)
        :param loss_seg_fn: segmentation loss function
        :param loss_rec_fn: reconstruction loss function
        :param optimizer: optimizer for the loss function
        :param epoch: current epoch
        :param augmenter_src: source data augmenter
        :param augmenter_tar: target data augmenter
        :param print_stats: frequency of printing statistics
        :param writer: summary writer
        :param write_images: frequency of writing images
        :param device: GPU device where the computations should occur
        :return: average training loss over the epoch
        """
        # perform training on GPU/CPU
        module_to_device(self, device)
        self.train()

        # keep track of the average loss during the epoch
        loss_seg_cum = 0.0
        loss_rec_cum = 0.0
        total_loss_cum = 0.0
        cnt = 0

        # start epoch
        for i, data in enumerate(zip(loader_src, loader_tar_ul, loader_tar_l)):

            # transfer to suitable device
            data_src = tensor_to_device(data[0], device)
            x_tar_ul = tensor_to_device(data[1], device)
            data_tar_l = tensor_to_device(data[2], device)

            # augment if necessary
            x_src, y_src = augment_samples(data_src, augmenter=augmenter_src)
            x_tar_l, y_tar_l = augment_samples(data_tar_l,
                                               augmenter=augmenter_tar)
            y_src = get_labels(y_src, coi=self.coi, dtype=int)
            y_tar_l = get_labels(y_tar_l, coi=self.coi, dtype=int)
            x_tar_ul = x_tar_ul.float()

            # zero the gradient buffers
            self.zero_grad()

            # forward prop
            y_src_pred = self(x_src)
            x_src_pred = self.reconstruction_outputs
            y_tar_ul_pred = self(x_tar_ul)
            x_tar_ul_pred = self.reconstruction_outputs
            y_tar_l_pred = self(x_tar_l)
            x_tar_l_pred = self.reconstruction_outputs

            # compute loss
            loss_seg = 0.5 * (loss_seg_fn(y_src_pred, y_src) +
                              loss_seg_fn(y_tar_l_pred, y_tar_l))
            loss_rec = 0.5 * (loss_rec_fn(x_src_pred, x_src) +
                              loss_rec_fn(x_tar_ul_pred, x_tar_ul))
            total_loss = loss_seg + self.lambda_rec * loss_rec
            loss_seg_cum += loss_seg.data.cpu().numpy()
            loss_rec_cum += loss_rec.data.cpu().numpy()
            total_loss_cum += total_loss.data.cpu().numpy()
            cnt += 1

            # backward prop
            total_loss.backward()

            # apply one step in the optimization
            optimizer.step()

            # print statistics of necessary
            if i % print_stats == 0:
                print(
                    '[%s] Epoch %5d - Iteration %5d/%5d - Loss seg: %.6f - Loss rec: %.6f - Loss: %.6f'
                    % (datetime.datetime.now(), epoch, i,
                       len(loader_src.dataset) / loader_src.batch_size,
                       loss_seg, loss_rec, total_loss))

        # don't forget to compute the average and print it
        loss_seg_avg = loss_seg_cum / cnt
        loss_rec_avg = loss_rec_cum / cnt
        total_loss_avg = total_loss_cum / cnt
        print('[%s] Epoch %5d - Loss seg: %.6f - Loss rec: %.6f - Loss: %.6f' %
              (datetime.datetime.now(), epoch, loss_seg_avg, loss_rec_avg,
               total_loss_avg))

        # log everything
        if writer is not None:

            # always log scalars
            log_scalars(
                [loss_seg_avg, loss_rec_avg, total_loss_avg],
                ['train/' + s for s in ['loss-rec', 'loss-seg', 'total-loss']],
                writer,
                epoch=epoch)

            # log images if necessary
            if write_images:
                y_src_pred = F.softmax(y_src_pred, dim=1)[:, 1:2, ...].data
                y_tar_l_pred = F.softmax(y_tar_l_pred, dim=1)[:, 1:2, ...].data
                log_images_3d([
                    x_src, x_src_pred.data, y_src, y_src_pred, x_tar_l,
                    x_tar_l_pred.data, y_tar_l, y_tar_l_pred
                ], [
                    'train/' + s for s in [
                        'src/x', 'src/x-pred', 'src/y', 'src/y-pred', 'tar/x',
                        'tar/x-pred', 'tar/y', 'tar/y-pred'
                    ]
                ],
                              writer,
                              epoch=epoch)

        return total_loss_avg