Example #1
0
def validate(args, epoch, model, loader, criterion, logger):
    steps = len(loader)
    local_loss = AverageMeter()
    local_acc = AverageMeter()
    local_recall = AverageMeter()
    aver_loss = AverageMeter()
    aver_acc = AverageMeter()
    aver_recall = AverageMeter()

    model.eval()
    if args.verbose:
        logger.info("Validating")

    with torch.no_grad():
        for i, (images, targets) in enumerate(loader, start=1):
            if args.gpu is not None:
                images = images.cuda(args.gpu, non_blocking=True)
                targets = targets.cuda(args.gpu, non_blocking=True)

            outputs = model(images)
            if args.multi:
                outputs = torch.sigmoid(outputs)
            loss = criterion(outputs, targets)

            if args.multi:
                precision, recall = calculate_metrics(
                    outputs.detach().cpu().numpy(),
                    targets.detach().cpu().numpy(), args.threshold)
            else:
                precision = accuracy(outputs, targets)[0].item()
                recall = precision

            local_loss.update(loss.item(), images.size(0))
            local_acc.update(precision, images.size(0))
            local_recall.update(recall, images.size(0))

            if args.distributed:
                running_metrics = torch.FloatTensor(
                    [loss.item(), precision, recall]).cuda(args.gpu)
                running_metrics /= args.world_size
                dist.all_reduce(running_metrics, op=dist.ReduceOp.SUM)
                aver_loss.update(running_metrics[0].item())
                aver_acc.update(running_metrics[1].item())
                aver_recall.update(running_metrics[2].item())
            else:
                aver_loss.update(loss.item(), images.size(0))
                aver_acc.update(precision, images.size(0))
                aver_recall.update(recall, images.size(0))

            if args.verbose and i % args.log_interval == 0:
                logger.info("Epoch: [{}] [{}]/[{}]({:.2%}) "
                            "Loss: {:.4f} / {:.4f} / {:.4f} "
                            "Acc: {:.2f} / {:.2f} / {:.2f} "
                            "Recall: {:.2f} / {:.2f} / {:.2f}".format(
                                epoch, i, steps, i / steps, loss,
                                local_loss.avg, aver_loss.avg, precision,
                                local_acc.avg, aver_acc.avg, recall,
                                local_recall.avg, aver_recall.avg))

    return aver_loss.avg, aver_acc.avg, aver_recall.avg
Example #2
0
def test(args, model, device, test_loader):
    if test_loader.dataset.train:
        print("test on validation set\r\n")
    else:
        print("test on test set\r\n")

    # validate
    model.eval()
    # test_loss = 0
    # correct = 0
    # num_samples = 0
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    with torch.no_grad():
        for data in test_loader:
            inputs, labels = data[0].to(device), data[1].to(device)
            outputs = model(inputs)
            test_loss = criterion(outputs, labels).item()
            # pred = outputs.argmax(dim=1, keepdim=True)
            # correct += pred.eq(labels.view_as(pred)).sum().item()
            # # _, pred = torch.max(outputs, 1)
            # # correct += (pred == labels).sum().item()
            # num_samples += pred.size(0)
            prec1, prec5 = accuracy(outputs, labels, topk=(1, 5))
            losses.update(test_loss, labels.size(0))
            top1.update(prec1[0], labels.size(0))
            top5.update(prec5[0], labels.size(0))

    print(
        '\nTest set: Average loss: {:.4f}, Accuracy: Prec@1:{}/{} ({:.2f}%) Prec@5:{}/{} ({:.2f}%)\n'
        .format(losses.avg, top1.sum // 100, top1.count, top1.avg,
                top5.sum // 100, top1.count, top5.avg))
    return top1.avg, top5.avg
Example #3
0
def validate(val_loader, model, criterion):
    model.eval()
    all_acc = []
    all_acc_top5 = []
    all_loss = []

    if args.model == "Elastic_InceptionV3":
        for ix in range((num_outputs - 1)):
            all_loss.append(AverageMeter())
            all_acc.append(AverageMeter())
            all_acc_top5.append(AverageMeter())
    else:
        for ix in range(num_outputs):
            all_loss.append(AverageMeter())
            all_acc.append(AverageMeter())
            all_acc_top5.append(AverageMeter())

    for i, (input, target) in enumerate(val_loader):
        target = target.cuda(async=True)
        input_var = torch.autograd.Variable(input)
        target_var = torch.autograd.Variable(target)

        losses = 0

        outputs = model(input_var)
        with torch.no_grad():
            for ix in range(len(outputs)):
                loss = criterion(outputs[ix], target_var)
                all_loss[ix].update(loss.item(), input.size(0))
                losses += loss

                prec1 = accuracy(outputs[ix].data, target)
                all_acc[ix].update(prec1[0].data[0].item(), input.size(0))

                # top 5 accuracy
                prec5 = accuracy(outputs[ix].data, target, topk=(5, ))
                all_acc_top5[ix].update(prec5[0].data[0].item(), input.size(0))
    accs = []
    ls = []
    accs_top5 = []
    for i, j, k in zip(all_acc, all_loss, all_acc_top5):
        accs.append(float(100 - i.avg))
        ls.append(j.avg)
        accs_top5.append(float(100 - k.avg))
    print("validation top 5 error: ", accs_top5)
    return accs, ls, accs_top5
Example #4
0
def train(train_loader, model, criterion, optimizer, epoch, args, lth_pruner,
          cur_round, mask_applied):
    batch_time = helper.AverageMeter('Time', ':6.3f')
    data_time = helper.AverageMeter('Data', ':6.3f')
    losses = helper.AverageMeter('Loss', ':.4e')
    top1 = helper.AverageMeter('Acc@1', ':6.2f')
    top5 = helper.AverageMeter('Acc@5', ':6.2f')
    progress = helper.ProgressMeter(
        len(train_loader), [batch_time, data_time, losses, top1, top5],
        prefix="Epoch: [{}]".format(epoch))

    # switch to train mode
    model.train()

    end = time.time()
    for i, (images, target) in enumerate(train_loader):

        data_time.update(time.time() - end)
        images, target = images.cuda(), target.cuda()
        output = model(images)

        # import pdb;
        # pdb.set_trace()

        loss = criterion(output, target)

        acc1, acc5 = helper.accuracy(output, target, topk=(1, 5))
        losses.update(loss.item(), images[0].size(0))
        top1.update(acc1[0], images[0].size(0))
        top5.update(acc5[0], images[0].size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()

        #aa = list(model.parameters())
        #print(aa[0].mean())

        #If in prune mode, block out gradients.
        #Basically after LTH pruning starts,maintain zero weights at zero.
        if cur_round > 0 or mask_applied:
            for k, (name, param) in enumerate(model.named_parameters()):
                #if 'weight' in name:
                if name in lth_pruner.mask:
                    weight_copy = param.data.abs().clone()
                    mask = weight_copy.gt(0).float().cuda()
                    param.grad.data.mul_(mask)

        optimizer.step()
        batch_time.update(time.time() - end)
        end = time.time()

        #pdb.set_trace()
        if i % 10 == 0:
            progress.display(i)

    return top1.avg, top5.avg, losses.avg, model
Example #5
0
def classifier_transfer(high_fid_data, batch_size, epoch, option=1):
    print("------------------in transfer----------------------")
    """
    transfer learning:
    1. reuse seq2seq and FNN weights and train both of them
    2. reuse seq2seq and FNN weights and train FNN weights
    3. reuse seq2seq weights, reinitialize FNN weights and train FNN only
    other: return ErrorMessage
    """
    sess = tf.compat.v1.Session()
    saver = tf.compat.v1.train.import_meta_graph("pretrain.meta")
    saver.restore(sess, tf.train.latest_checkpoint("./"))

    graph = tf.compat.v1.get_default_graph()
    x = graph.get_tensor_by_name("x:0")
    y = graph.get_tensor_by_name("y:0")
    y_ = graph.get_tensor_by_name("y_:0")

    loss = graph.get_tensor_by_name("loss:0")
    train = tf.compat.v1.get_collection("optimizer")[0]
    new_train = tf.compat.v1.get_collection("optimizer")[1]
    if option == 1:
        optimizer = train
    elif option == 2:
        optimizer = new_train
    elif option == 3:
        optimizer = new_train
        var = tf.compat.v1.global_variables()
        var_to_init = [
            val for val in var if ("fnn/W" in val.name or "fnn/b" in val.name)
        ]
        epoch *= 3
        sess.run(tf.compat.v1.variables_initializer(var_to_init))
    else:
        print("option not available, please assign 1 or 2 or 3 to option")
        return

    for i in range(epoch):
        for _ in range(int(high_fid_data.train_n / batch_size)):
            d = high_fid_data.train_next_batch(batch_size)
            sess.run(optimizer,
                     feed_dict={
                         x: d[0],
                         y_: threeclasses(d[1][:, None])
                     })
        d = high_fid_data.test()
        err = sess.run(loss,
                       feed_dict={
                           x: d[0],
                           y_: threeclasses(d[1][:, None])
                       })

        y_pred = sess.run(y, feed_dict={x: d[0]})
        acc = accuracy(threeclasses(d[1][:, None]), y_pred)

        print("Epoch %d, %f, test acc: %f" % (i, err, acc))
    return acc
Example #6
0
def validate(val_loader, model, criterion, print_freq, colorization=False):
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    end = time.time()
    for i, (input, target) in enumerate(val_loader):
       # target = target.cuda()
       # input = input.cuda()
        if args.mode=='pretrain':
            target = target.to(device, dtype=torch.float32)
            input = input.to(device, dtype=torch.float32)
        else:
            target = target.cuda()
            input = input.cuda()

        if colorization:
            input = transforms.Resize(500)(input)
            target = transforms.Resize(500)(target)
            input = input.repeat(1,3,1,1)
        if args.pretrain_task=='colorization':
            input = transforms.Resize(500)(input)

        with torch.no_grad():
            # compute output
            output = model(input)
            loss = criterion(output, target)
            # measure accuracy and record loss
            losses.update(loss.item(), input.size(0))
            if not colorization:
                prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
                top1.update(prec1, input.size(0))
                top5.update(prec5, input.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()
    if not colorization:
        print('Test: [{0}/{1}]\t'
              'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
              'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
              'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
              'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
            i, len(val_loader), batch_time=batch_time, loss=losses,
            top1=top1, top5=top5))
    else:
        print('Test: [{0}/{1}]\t'
              'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
              'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format(
            i, len(val_loader), batch_time=batch_time, loss=losses))
    return losses.avg, top1.avg, top5.avg
Example #7
0
def train(train_loader, model, criterion, optimizer, epoch, print_freq):
    device = xm.xla_device()
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to train mode
    model.train()

    end = time.time()
    for i, (input, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        target = target.to(device)  #.cuda(non_blocking=True)
        input = input.to(device)  #.cuda(non_blocking=True)

        # compute output
        output = model(input)
        loss = criterion(output, target)

        # measure accuracy and record loss
        prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
        losses.update(loss.item(), input.size(0))
        top1.update(prec1[0], input.size(0))
        top5.update(prec1[0], input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                  'Prec@5 {top5.val:.3f} ({top5.avg:.3f})\t'.format(
                      epoch,
                      i,
                      len(train_loader),
                      batch_time=batch_time,
                      data_time=data_time,
                      loss=losses,
                      top1=top1,
                      top5=top5))
Example #8
0
def validate(val_loader, model, criterion, print_freq, epoch):
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to evaluate mode
    model.eval()
    # model.apply(unfreeze_bn)

    for i, (input, target) in enumerate(val_loader):
        target = target.cuda(non_blocking=True)
        input = input.cuda(non_blocking=True)
        with torch.no_grad():
            # compute output
            output = model(input)
            loss = criterion(output, target)

            # measure accuracy and record loss
            prec = accuracy(output.data, target, topk=(1, 5))
            losses.update(loss.item(), input.size(0))
            top1.update(prec[0].item(), input.size(0))
            top5.update(prec[1].item(), input.size(0))

            # measure elapsed time

            if i % print_freq == 0:
                print('Test: [{0}][{1}/{2}]\t'
                      'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                      'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                          epoch,
                          i,
                          len(val_loader),
                          loss=losses,
                          top1=top1,
                          top5=top5))
        n_iter = (epoch) * len(train_loader) + i + 1

        # if args.record:
        #     writer.add_scalar('Test/Average loss', losses.val,  n_iter)
        #     writer.add_scalar('Test/Accuracy', top1.val,  n_iter)

    model.train()
    f_loss.write('\n epoch {} test with loss {:.4f} \n'.format(
        epoch, losses.avg))
    f_acc.write('\n epoch {} test with top1 {:.3f} and top5 {:.3f} \n'.format(
        epoch, top1.avg, top5.avg))
    print(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'.format(top1=top1,
                                                                  top5=top5))

    return top1.avg, top5.avg
def train(train_loader, model, criterion, optimizer, epoch, print_freq):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to train mode
    model.train()

    end = time.time()
    for i, (input, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        target = target.cuda(async=True)
        input = input.cuda(async=True)

        # compute output
        output = model(input)
        loss = criterion(output, target)

        # measure accuracy and record loss
        prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
        losses.update(loss.item(), input.size(0))
        top1.update(prec1[0], input.size(0))
        top5.update(prec5[0], input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % print_freq == 0:
            printInfo = 'Epoch: [{0}][{1}/{2}],\t'.format(epoch,i,len(train_loader))+\
                'Time {batch_time.val:.3f}({batch_time.avg:.3f}),\t'.format(batch_time=batch_time) + \
            'Data {data_time.val:.3f} ({data_time.avg:.3f}),\t'.format(data_time=data_time) + \
            'Loss {loss.val:.4f} ({loss.avg:.4f}),\t'.format(loss=losses) + \
            'Prec@1 {top1.val:.3f} ({top1.avg:.3f}),\t'.format(top1=top1) + \
            'Prec@5 {top5.val:.3f} ({top5.avg:.3f}),\t'.format(top5=top5)

            logging.info(printInfo)
Example #10
0
def validate(val_loader, model, criterion, print_freq):
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    end = time.time()
    for i, (input, target) in enumerate(val_loader):
        target = target.cuda(async=True)
        input = input.cuda(async=True)
        with torch.no_grad():
            # compute output
            output = model(input)
            loss = criterion(output, target)

            # measure accuracy and record loss
            prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
            losses.update(loss.item(), input.size(0))
            top1.update(prec1[0], input.size(0))
            top5.update(prec5[0], input.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % print_freq == 0:
                print('Test: [{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                      'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                          i,
                          len(val_loader),
                          batch_time=batch_time,
                          loss=losses,
                          top1=top1,
                          top5=top5))

    print(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'.format(top1=top1,
                                                                  top5=top5))

    return top1.avg, top5.avg
Example #11
0
def validate(val_loader, model, criterion, args):
    batch_time = helper.AverageMeter('Time', ':6.3f')
    losses = helper.AverageMeter('Loss', ':.4e')
    top1 = helper.AverageMeter('Acc@1', ':6.2f')
    top5 = helper.AverageMeter('Acc@5', ':6.2f')
    progress = helper.ProgressMeter(len(val_loader),
                                    [batch_time, losses, top1, top5],
                                    prefix='Test: ')

    # switch to evaluate mode
    model.eval()

    with torch.no_grad():
        end = time.time()
        #print(len(val_loader),args.batch_size)

        for i, (images, target) in enumerate(val_loader):
            images, target = images.cuda(), target.cuda()
            output = model(images)

            loss = criterion(output, target)
            # measure accuracy and record loss
            acc1, acc5 = helper.accuracy(output, target, topk=(1, 5))
            losses.update(loss.item(), images.size(0))
            top1.update(acc1[0], images.size(0))
            top5.update(acc5[0], images.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % 10 == 0:
                progress.display(i)

        # TODO: this should also be done with the helper.ProgressMeter
        print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'.format(top1=top1,
                                                                    top5=top5))

    return top1.avg, top5.avg, losses.avg
Example #12
0
def validatetrain(val_loader, model, criterion):
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    model.eval()

    for i, (input, target) in enumerate(val_loader):
        target = target.cuda(non_blocking=True)
        input = input.cuda(non_blocking=True)
        with torch.no_grad():
            # compute output
            output = model(input)
            loss = criterion(output, target)

            # measure accuracy and record loss
            prec = accuracy(output.data, target, topk=(1, 5))
            losses.update(loss.item(), input.size(0))
            top1.update(prec[0].item(), input.size(0))
            top5.update(prec[1].item(), input.size(0))
    model.train()
    print(' *TRAINMODE Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'.format(
        top1=top1, top5=top5))
Example #13
0
def batch_boost_train(train_loader, model,weak_model,samplewise_criterion,\
 optimizer_1,optimizer_2,epoch, args):

    batch_time = helper.AverageMeter('Time', ':6.3f')
    data_time = helper.AverageMeter('Data', ':6.3f')

    losses_strong = helper.AverageMeter('Loss strong', ':.4e')
    top1_strong = helper.AverageMeter('Acc@1 strong', ':6.2f')

    losses_weak = helper.AverageMeter('Loss weak', ':.4e')
    top1_weak = helper.AverageMeter('Acc@1 weak', ':6.2f')

    progress = helper.ProgressMeter(len(train_loader), [
        batch_time, data_time, losses_strong, top1_strong, losses_weak,
        top1_weak
    ],
                                    prefix="Epoch: [{}]".format(epoch))

    samplewise_criterion_weak = nn.CrossEntropyLoss(reduce='none')

    # switch to train mode
    model.train()
    weak_model.train()

    end = time.time()
    for i, (images, target) in enumerate(train_loader):

        data_time.update(time.time() - end)
        images, target = images.cuda(), target.cuda()
        output = model(images)

        samplewise_loss_strong = samplewise_criterion(output, target)

        strong_loss = samplewise_loss_strong.mean()
        acc1, acc5 = helper.accuracy(output, target, topk=(1, 5))
        losses_strong.update(strong_loss.item(), images[0].size(0))
        top1_strong.update(acc1[0], images[0].size(0))

        # compute gradient and do SGD step
        optimizer_1.zero_grad()
        strong_loss.backward()

        #Use the per sample loss from strong model as weights for the weak model.
        #Rescale sample wise loss to [0,1]
        weights = samplewise_loss_strong.clone().detach()
        weights -= weights.min()
        weights /= weights.max()

        output_2 = weak_model(images)
        samplewise_loss_weak = samplewise_criterion_weak(output_2, target)

        #Use it as weightsfor the weak model loss.
        samplewise_loss_weak = samplewise_loss_weak * weights
        weak_loss = samplewise_loss_weak.mean()
        acc1, acc5 = helper.accuracy(output_2, target, topk=(1, 5))
        losses_weak.update(weak_loss.item(), images[0].size(0))
        top1_weak.update(acc1[0], images[0].size(0))

        optimizer_2.zero_grad()
        weak_loss.backward()

        #If in prune mode, block out gradients.
        #Basically after LTH pruning starts,maintain zero weights at zero.
        for k, (name, param) in enumerate(model.named_parameters()):
            if 'weight' in name and 'bn' not in name:
                weight_copy = param.data.abs().clone()
                mask = weight_copy.gt(0).float().cuda()
                param.grad.data.mul_(mask)

        for k, (name, param) in enumerate(weak_model.named_parameters()):
            if 'weight' in name and 'bn' not in name:
                weight_copy = param.data.abs().clone()
                mask = weight_copy.gt(0).float().cuda()
                param.grad.data.mul_(mask)

        optimizer_1.step()
        optimizer_2.step()

        #pdb.set_trace()

        batch_time.update(time.time() - end)
        end = time.time()

        if i % 10 == 0:
            progress.display(i)

    return top1_strong.avg, top1_weak.avg, losses_strong.avg, losses_weak.avg, model, weak_model
    def meta_fit(self, meta_dataset_generator) -> Learner:
        """
        Args:
            meta_dataset_generator : a DataGenerator object. We can access 
                the meta-train and meta-validation episodes via its attributes.
                Refer to the metadl/data/dataset.py for more details.
        
        Returns:
            MyLearner object : a Learner that stores the meta-learner's 
                learning object. (e.g. a neural network trained on meta-train
                episodes)
        """
        meta_train_dataset = meta_dataset_generator.meta_train_pipeline
        meta_valid_dataset = meta_dataset_generator.meta_valid_pipeline

        meta_train_dataset = meta_train_dataset.batch(1)
        #         meta_valid_dataset = meta_valid_dataset.batch(2)
        logging.info('Starting meta-fit for the proto-net ...')
        self.embedding_fn.eval()
        acc_list = []

        for tasks_batch in meta_train_dataset:
            sup_set = tf.data.Dataset.from_tensor_slices(\
                (tasks_batch[0][1], tasks_batch[0][0]))
            que_set = tf.data.Dataset.from_tensor_slices(\
                (tasks_batch[0][4], tasks_batch[0][3]))

            new_ds = tf.data.Dataset.zip((sup_set, que_set))
            self.count += 1
            for ((spt_labs, spt_img), (qry_labs, qry_img)) in new_ds:

                spt_img = torch.tensor(np.array(spt_img)).permute(
                    0, 3, 1, 2).contiguous().to('cuda', non_blocking=True)
                spt_labs = torch.tensor(np.array(spt_labs)).to(
                    'cuda', non_blocking=True)
                with torch.no_grad():
                    spt_fea = self.embedding_fn(spt_img, True)[0]
                self.classifier.fit(spt_fea, spt_labs)

                qry_img = torch.tensor(np.array(qry_img)).permute(
                    0, 3, 1, 2).contiguous().to('cuda', non_blocking=True)
                qry_labs = torch.tensor(np.array(qry_labs)).to(
                    'cuda', non_blocking=True)
                torch.cuda.empty_cache()

                prob_list = []
                for a_img in qry_img:
                    a_img = a_img.unsqueeze(0)
                    with torch.no_grad():
                        a_img = self.embedding_fn(a_img, True)[0]
                    prob_list.append(self.classifier.predict(a_img))
                    probs = torch.cat(prob_list)


#             pdb.set_trace()
            acc_list.append(accuracy(probs.max(dim=1)[1], qry_labs))
            # Backward Propagation
            self.optimizer.zero_grad()
            torch.cuda.empty_cache()
            qry_labs = torch.tensor(qry_labs, dtype=torch.long).cuda()
            ce_loss = F.cross_entropy(probs, qry_labs)
            torch.cuda.empty_cache()
            ce_loss.backward()
            torch.cuda.empty_cache()
            self.optimizer.step()
            torch.cuda.empty_cache()
            self.lr_schedule.step()
            if self.count % 50 == 0:
                acc, h_ = mean_confidence_interval(acc_list)
                acc = round(acc * 100, 2)
                h_ = round(h_ * 100, 2)
                print("meta_test aver_accuracy:", acc, "h:", h_)
                acc_list = []
            if (self.count > self.meta_iterations):
                break

        return MyLearner(embedding_fn=self.embedding_fn,
                         classifier=self.classifier)
Example #15
0
def classifier(
    low_fid_data,
    high_fid_data,
    nblock,
    nn_size,
    nn_layer,
    learning_rate,
    batch_size,
    epoch,
    beta,
):

    batch_size = min(high_fid_data.train_n, batch_size)
    outdim = 2
    tf.compat.v1.reset_default_graph()
    sampling_horizon = low_fid_data.sampling_horizon
    if low_fid_data.feature is not None:
        sampling_horizon += low_fid_data.feature[0].size

    x = tf.compat.v1.placeholder(tf.float32, [None, sampling_horizon],
                                 name="x")
    # weights = tf.compat.v1.placeholder(tf.float32, [sampling_horizon], name="weights")
    learn_rate = tf.constant(learning_rate, name="learn_rate")
    print(f"Learning rate: {learn_rate}")

    y = x
    for _ in range(nblock):
        y = attention_block(y, sampling_horizon)
        y = tf.squeeze(y)

    with tf.compat.v1.variable_scope("fnn"):
        W = tf.Variable(tf.random.normal([sampling_horizon, nn_size],
                                         stddev=0.1),
                        name="W")
        b = tf.Variable(tf.random.normal([nn_size], stddev=0.1), name="b")
        y = tf.nn.relu(tf.tensordot(y, W, [[1], [0]]) + b)

        for _ in range(nn_layer - 1):
            W = tf.Variable(tf.random.normal([nn_size, nn_size], stddev=0.1),
                            name="W")
            b = tf.Variable(tf.random.normal([nn_size], stddev=0.1), name="b")
            y = tf.nn.relu(tf.tensordot(y, W, [[1], [0]]) + b)

        W = tf.Variable(tf.random.normal([nn_size, outdim], stddev=0.1),
                        name="W")
        b = tf.Variable(tf.random.normal([], stddev=0.1), name="b")

    y = tf.identity(tf.tensordot(y, W, [[1], [0]]) + b, name="y")

    y_ = tf.compat.v1.placeholder(tf.float32, [None, outdim], name="y_")

    loss = tf.compat.v1.losses.softmax_cross_entropy(y_, y)
    # add L2 regularization
    L2_var = [
        var for var in tf.compat.v1.global_variables() if
        ("fnn/W" in var.name or "fnn/b" in var.name) and "Adam" not in var.name
    ]

    lossL2 = tf.math.add_n([tf.nn.l2_loss(v) for v in L2_var]) * beta

    loss = tf.identity(loss + lossL2, name="loss")
    train = tf.compat.v1.train.AdamOptimizer(learning_rate).minimize(loss)

    tf.compat.v1.add_to_collections("optimizer", train)

    saver = tf.compat.v1.train.Saver()
    sess = tf.compat.v1.Session()
    sess.run(tf.compat.v1.global_variables_initializer())

    for i in range(epoch):
        low_fid_data.mixup("None")
        for _ in range(int(low_fid_data.train_n / batch_size)):
            d = low_fid_data.train_next_batch(batch_size)
            sess.run(train,
                     feed_dict={
                         x: d[0],
                         y_: hypoglycemia(d[1][:, None])
                     })
        d = high_fid_data.test()
        err = sess.run(loss,
                       feed_dict={
                           x: d[0],
                           y_: hypoglycemia(d[1][:, None])
                       })

        y_pred = sess.run(y, feed_dict={x: d[0]})
        acc = accuracy(hypoglycemia(d[1][:, None]), y_pred)

        print("Epoch %d, %f, test acc: %f" % (i, err, acc))
    saver.save(sess, "pretrain")
    return acc, np.hstack((np.argmax(hypoglycemia(d[1][:, None]),
                                     axis=1).reshape((-1, 1)), y_pred))
Example #16
0
def classifier_transfer(high_fid_data, batch_size, epoch, option=1):
    print("------------------in transfer----------------------")
    """
    transfer learning:
    1. reuse attention and FNN weights and train both of them
    2. reuse attention and FNN weights and train FNN weights
    3. reuse attention weights, reinitialize FNN weights and train FNN only
    other: return ErrorMessage
    """
    sess = tf.compat.v1.Session()
    saver = tf.compat.v1.train.import_meta_graph("pretrain.meta")
    saver.restore(sess, tf.train.latest_checkpoint("./"))

    graph = tf.compat.v1.get_default_graph()
    x = graph.get_tensor_by_name("x:0")
    y = graph.get_tensor_by_name("y:0")
    y_ = graph.get_tensor_by_name("y_:0")
    # weights = graph.get_tensor_by_name("weights:0")
    loss = graph.get_tensor_by_name("loss:0")
    learning_rate = graph.get_tensor_by_name("learn_rate:0")

    if option == 1:
        optimizer = tf.compat.v1.get_collection("optimizer")[0]
    elif option > 1:
        var_to_init = [
            var for var in tf.compat.v1.global_variables()
            if ("fnn/W" in var.name or "fnn/b" in var.name)
            and "Adam" not in var.name
        ]
        optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate,
                                                     name="newAdam").minimize(
                                                         loss,
                                                         var_list=var_to_init)
        uninitialized_vars = []
        for var in tf.compat.v1.global_variables():
            try:
                sess.run(var)
            except tf.errors.FailedPreconditionError:
                uninitialized_vars.append(var)
        sess.run(tf.compat.v1.variables_initializer(uninitialized_vars))

        if option == 3:
            # reinitialize weights in FNN
            epoch *= 3
            sess.run(tf.compat.v1.variables_initializer(var_to_init))
    for i in range(epoch):
        for _ in range(int(high_fid_data.train_n / batch_size)):
            d = high_fid_data.train_next_batch(batch_size)
            sess.run(optimizer,
                     feed_dict={
                         x: d[0],
                         y_: hypoglycemia(d[1][:, None])
                     })
        d = high_fid_data.test()
        err = sess.run(loss,
                       feed_dict={
                           x: d[0],
                           y_: hypoglycemia(d[1][:, None])
                       })

        y_pred = sess.run(y, feed_dict={x: d[0]})
        acc = accuracy(hypoglycemia(d[1][:, None]), y_pred)

        print("Epoch %d, %f, test acc: %f" % (i, err, acc))
    return acc, y_pred
Example #17
0
def classifier(
    low_fid_data,
    high_fid_data,
    k_size,
    nblock,
    nn_size,
    nn_layer,
    learning_rate,
    batch_size,
    epoch,
    beta,
):

    tf.compat.v1.reset_default_graph()
    batch_size = min(high_fid_data.train_n, batch_size)
    outdim = 3

    learn_rate = tf.constant(learning_rate, name="learn_rate")
    print(f"Learning rate: {learn_rate}")

    sampling_horizon = low_fid_data.sampling_horizon
    if low_fid_data.feature is not None:
        sampling_horizon += low_fid_data.feature[0].size
    sess = tf.compat.v1.Session()

    x = tf.compat.v1.placeholder(tf.float32, [None, sampling_horizon], name="x")
    alpha = tf.Variable(tf.random.normal([], stddev=0.1))
    p = tf.math.sin(tf.range(float(sampling_horizon)))
    y = x + alpha * p
    # weights = tf.compat.v1.placeholder(tf.float32, [sampling_horizon], name="weights")

    assert k_size < sampling_horizon
    for _ in range(nblock):
        x0 = tf.slice(y, [0, 0], [-1, 1])
        x0s = tf.tile(x0, [1, k_size - 1])
        xx = tf.concat([x0s, y], 1)
        data = tf.reshape(xx, [-1, sampling_horizon + k_size - 1, 1])

        kernel1 = tf.Variable(tf.random.normal([k_size, 1, 1], stddev=0.1))
        kernel2 = tf.Variable(tf.random.normal([k_size, 1, 1], stddev=0.1))
        A = tf.squeeze(
            tf.nn.conv1d(input=data, filters=kernel1, stride=1, padding="VALID")
        )
        B = tf.squeeze(
            tf.nn.conv1d(input=data, filters=kernel2, stride=1, padding="VALID")
        )
        y = tf.math.multiply(A, tf.sigmoid(B)) + y

    # FNN
    with tf.compat.v1.variable_scope("fnn"):
        W = tf.Variable(
            tf.random.normal([sampling_horizon, nn_size], stddev=0.1), name="W"
        )
        b = tf.Variable(tf.random.normal([nn_size], stddev=0.1), name="b")
        y = tf.nn.relu(tf.tensordot(y, W, [[1], [0]]) + b)
        for _ in range(nn_layer - 1):
            W = tf.Variable(tf.random.normal([nn_size, nn_size], stddev=0.1), name="W")
            b = tf.Variable(tf.random.normal([nn_size], stddev=0.1), name="b")
            y = tf.nn.relu(tf.tensordot(y, W, [[1], [0]]) + b)

        W = tf.Variable(tf.random.normal([nn_size, outdim], stddev=0.1), name="W")
        b = tf.Variable(tf.random.normal([], stddev=0.1), name="b")
        y = tf.tensordot(y, W, [[1], [0]]) + b
    y = tf.identity(y, name="y")

    y_ = tf.compat.v1.placeholder(tf.float32, [None, outdim], name="y_")

    loss = tf.compat.v1.losses.softmax_cross_entropy(y_, y)

    # add L2 regularization
    L2_var = [
        var
        for var in tf.compat.v1.global_variables()
        if ("fnn/W" in var.name or "fnn/b" in var.name) and "Adam" not in var.name
    ]

    lossL2 = tf.math.add_n([tf.nn.l2_loss(v) for v in L2_var]) * beta

    loss = tf.identity(loss + lossL2, name="loss")

    train = tf.compat.v1.train.AdamOptimizer(learning_rate).minimize(loss)
    new_train = tf.compat.v1.train.AdamOptimizer(learning_rate).minimize(
        loss, var_list=[W, b]
    )
    tf.compat.v1.add_to_collections("optimizer", train)
    tf.compat.v1.add_to_collections("optimizer", new_train)

    saver = tf.compat.v1.train.Saver()
    sess.run(tf.compat.v1.global_variables_initializer())

    for i in range(epoch):
        for _ in range(int(low_fid_data.train_n / batch_size)):
            d = low_fid_data.train_next_batch(batch_size)
            sess.run(train, feed_dict={x: d[0], y_: threeclasses(d[1][:, None])})
        d = high_fid_data.test()
        err = sess.run(loss, feed_dict={x: d[0], y_: threeclasses(d[1][:, None])})

        y_pred = sess.run(y, feed_dict={x: d[0]})
        acc = accuracy(threeclasses(d[1][:, None]), y_pred)

        print("Epoch %d, %f, test acc: %f" % (i, err, acc))
    saver.save(sess, "pretrain")
    return acc
Example #18
0
                                          shuffle=True)
testloader = torch.utils.data.DataLoader(test_data, batch_size=64)
validloader = torch.utils.data.DataLoader(validate_data, batch_size=64)

if args.arch == 'vgg':
    input_size = 25088
    model = models.vgg16(pretrained=True)
elif args.arch == 'resnet':
    input_size = 2048
    model = models.alexnet(pretrained=True)

for param in model.parameters():
    param.requires_grad = False
model.classifier = nn.Sequential(nn.Linear(input_size, args.hidden_layers),
                                 nn.ReLU(), nn.Dropout(p=0.5),
                                 nn.Linear(args.hidden_layers, 102),
                                 nn.LogSoftmax(dim=1))
print(model)

criterion = nn.NLLLoss()
device = args.gpu
optimizer = optim.Adam(model.classifier.parameters(), args.lr)
loss, accuracy = helper.validate(model, criterion, testloader, device)
print(f"loss: {loss} \n Accuracy: {accuracy}")
epochs = args.epochs
model = helper.train(model, optimizer, criterion, epochs, trainloader,
                     validloader, device)
helper.accuracy(model, testloader, device)
helper.save(model, train_data, args.arch, input_size, args.hidden_layers,
            epochs, args.lr)
def main(**kwargs):
    global args

    for arg, v in kwargs.items():
        args.__setattr__(arg, v)

    print(args)

    program_start_time = time.time()
    instanceName = "classification_Accuracy"
    folder_path = os.path.dirname(os.path.abspath(__file__))

    timestamp = datetime.datetime.now()
    ts_str = timestamp.strftime('%Y-%m-%d-%H-%M-%S')
    path = folder_path + os.sep + instanceName + os.sep + args.model + os.sep + ts_str + "_" + args.dataset + "_" + args.wordembedding

    if args.debug:
        print("[Debug mode]")
        path = folder_path + os.sep + instanceName + os.sep + "Debug-" + args.model + os.sep + ts_str + "_" + args.dataset + "_" + args.wordembedding
    else:
        path = folder_path + os.sep + instanceName + os.sep + args.model + os.sep + ts_str + "_" + args.dataset + "_" + args.wordembedding

    os.makedirs(path)

    args.savedir = path

    global logFile
    logFile = path + os.sep + "log.txt"

    if args.model == "BiLSTMConv":
        Model = models.BiLSTMConv

    # elif args.model == "BiGRU":
    #     Model = models.BiGRU

    # elif args.model == "WordCNN":
    #     Model = models.WordCNN

    # elif args.model == "BiGRUWithTimeDropout":
    #     Model = models.BiGRUWithTimeDropout

    elif args.model == "CNN_Text_Model":
        Model = CNN_Text_Model.CNN_Text
    elif args.model == "VDCNN":
        Model = vcdnn.VDCNN
    else:
        NotImplementedError

    # process the input data.

    captionStrDict = {"fig_title": args.dataset, "x_label": "epoch"}

    train_iter, test_iter, net = data_preprocess.prepare_data_and_model(
        Model=Model, args=args, using_gpu=True)
    print("args: ", args)

    LOG(str(args), logFile)

    global device
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    device = 'cuda'

    net = net.to(device)

    if device == 'cuda':
        net = torch.nn.DataParallel(net).cuda()
        cudnn.benchmark = True

    optimizer = optim.Adam(params=net.parameters(), lr=1e-3, weight_decay=1e-4)
    lr_scheduler = optim.lr_scheduler.StepLR(optimizer,
                                             step_size=1000,
                                             gamma=.99)
    if device == "cuda":
        criterion = nn.CrossEntropyLoss().cuda()
    else:
        criterion = nn.CrossEntropyLoss()

    # criterion = nn.CrossEntropyLoss().cuda()

    best_test_acc = 0
    best_test_results = []
    ground_truth = []

    epoch_train_accs = []
    epoch_train_losses = []
    epoch_test_accs = []
    epoch_test_losses = []
    epoch_lrs = []

    for epoch in range(args.epochs):

        epoch_start_time = time.time()

        train_accs = []
        train_losses = []

        for batch in tqdm(train_iter):
            lr_scheduler.step()

            net.train()
            xs = batch.text
            ys = batch.label
            # # ys = ys.squeeze(1)
            # print("ys_train data type: ", type(ys))
            # print("ys_train: ", ys)
            if device == 'cuda':
                ys = ys.cuda(async=True)
            # ys = torch.autograd.Variable(ys)
            xs = torch.autograd.Variable(xs)
            ys_var = torch.autograd.Variable(ys)
            # print(ys_var)

            logits = net(xs)
            loss = criterion(logits, ys_var)
            # print("loss: ", loss.item())

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_losses.append(loss.item() / int(args.batch_size))
            train_accs.append(accuracy(logits.data, ys))

        train_accs_normal = [i[0].item() for i in train_accs]

        # print("epoch ", epoch, " :  training accumulated accuracy ", np.mean(train_accs_normal))
        LOG("epoch: " + str(epoch), logFile)
        LOG("[TRAIN] accumulated accuracy: " + str(np.mean(train_accs_normal)),
            logFile)

        epoch_train_accs.append(np.mean(train_accs_normal))
        epoch_train_losses.append(np.mean(train_losses))

        test_accs = []
        test_losses = []
        test_predict_results = []

        best_test_acc = 0

        net.eval()

        pred_results = []

        print("running testing.....")
        for batch in tqdm(test_iter):
            xs_test = batch.text
            ys_test = batch.label

            logits_test = net(xs_test)
            test_loss = criterion(logits_test, ys_test)

            test_losses.append(test_loss.item() / int(args.batch_size))
            test_accs.append(accuracy(logits_test.data, ys_test))

            pred_results = pred_results + logits_test.topk(
                1, 1, True, True)[1].t().cpu().numpy().tolist()[0]

            if epoch == (args.epochs - 1):
                ground_truth = ground_truth + ys_test.cpu().numpy().tolist()

        test_accs_normal = [i[0].item() for i in test_accs]

        # print("epoch {} :  testing accumulated accuracy {} %".format(epoch, np.mean(test_accs)))
        print("epoch ", epoch, " :  testing accumulated accuracy ",
              np.mean(test_accs_normal))

        # LOG("epoch: "+ str(epoch), logFile)
        LOG("[TEST] accumulated accuracy: " + str(np.mean(test_accs_normal)),
            logFile)

        if best_test_acc < np.mean(test_accs_normal):
            best_test_acc = np.mean(test_accs_normal)
            best_test_results = pred_results
            torch.save(net.state_dict(),
                       path + os.sep + str(Model.name) + ".pkl")

        epoch_test_accs.append(np.mean(test_accs_normal))
        epoch_test_losses.append(np.mean(test_losses))

        # epoch_lrs.append(0.1)
        try:
            lr = float(str(optimizer[-1]).split("\n")[-5].split(" ")[-1])
        except:
            lr = 100
        epoch_lrs.append(lr)

        log_stats(path, [np.mean(train_accs_normal)], [np.mean(train_losses)],
                  [np.mean(test_accs_normal)], [np.mean(test_losses)], lr)

        one_epoch_last_time = time.time() - epoch_start_time

        LOG("last time: " + str(one_epoch_last_time), logFile)

    df = pd.DataFrame(data={
        "test_label": best_test_results,
        "ground truth": ground_truth
    })
    df.to_csv(path + os.sep + "test_classification_result.csv",
              sep=',',
              index=True)

    # save the metrics report
    logFile = confusion_matrix(df["test_label"], df["ground truth"], logFile)

    # #     # here plot figures
    # # algos\Classification_Accuracy\CNN_Text_Model\2019-01-23-14-58-01_tripadvisor\test_acc.txt
    #     import pandas as pd
    #     # algos\Classification_Accuracy\BiLSTMConv\\2019-01-22-10-29-54_tripadvisor\test_acc.txt
    #     epoch_test_accs = list(pd.read_csv("algos\\Classification_Accuracy\\CNN_Text_Model\\2019-01-23-14-58-01_tripadvisor\\test_acc.txt", header=None).iloc[:,0])
    #     epoch_train_accs = list(pd.read_csv("algos\\Classification_Accuracy\\CNN_Text_Model\\2019-01-23-14-58-01_tripadvisor\\train_acc.txt", header=None).iloc[:,0])
    #     epoch_train_losses = list(pd.read_csv("algos\\Classification_Accuracy\\CNN_Text_Model\\2019-01-23-14-58-01_tripadvisor\\train_losses.txt", header=None).iloc[:,0])
    #     epoch_test_losses = list(pd.read_csv("algos\\Classification_Accuracy\\CNN_Text_Model\\2019-01-23-14-58-01_tripadvisor\\test_losses.txt", header=None).iloc[:,0])

    plot_figs(epoch_train_accs, epoch_train_losses, epoch_test_accs,
              epoch_test_losses, args, captionStrDict)
    LOG("============Finish============", logFile)
Example #20
0
def train(train_loader, model, criterion, optimizer, epoch, print_freq):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to train mode
    model.train()
    #model.apply(freeze_bn)
    dslen = len(train_loader)
    end = time.time()
    for k, (input, target) in enumerate(train_loader):
        if epoch < args.warm:
            warmup_scheduler.step()

        # measure data loading time
        if True:
            data_time.update(time.time() - end)

            target = target.cuda(non_blocking=True)
            input = input.cuda(non_blocking=True)

            # compute output
            output = model(input)
            loss = criterion(output, target)

            # measure accuracy and record loss
            prec = accuracy(output.data, target, topk=(1, 5))
            losses.update(loss.item(), input.size(0))
            top1.update(prec[0].item(), input.size(0))
            top5.update(prec[1].item(), input.size(0))

            # compute gradient and do SGD step
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()
        if 'mean' in args.weight:
            model.apply(meanweigh)  #(model)

        n_iter = (epoch) * len(train_loader) + k
        if n_iter == 1:
            os.system('nvidia-smi')
        if n_iter % print_freq == 1 and epoch < args.start_epoch + 5:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'LR: {3:.5f}\t'
                  'Time {batch_time.avg:.3f}\t'
                  'Data {data_time.avg:.3f}\t'
                  'LossEpoch {loss.avg:.4f}\t'
                  'Prec@1 {top1.avg:.3f}\t'
                  'Prec@5 {top5.avg:.3f}\t'.format(
                      epoch,
                      n_iter,
                      dslen,
                      optimizer.param_groups[0]['lr'],
                      batch_time=batch_time,
                      data_time=data_time,
                      loss=losses,
                      top1=top1,
                      top5=top5),
                  flush=True)
            batch_time.reset()
            data_time.reset()
            losses.reset()
            top1.reset()
            top5.reset()
            #validatetrain(val_loader, model, criterion)
        elif k == dslen - 1:
            model.apply(inspect_bn)
            print('Epoch: [{0}][{1}/{2}]\t'
                  'LR: {3:.5f}\t'
                  'Time {batch_time.avg:.3f}\t'
                  'Data {data_time.avg:.3f}\t'
                  'LossEpoch {loss.avg:.4f}\t'
                  'Prec@1 {top1.avg:.3f}\t'
                  'Prec@5 {top5.avg:.3f}\t'.format(
                      epoch,
                      n_iter,
                      dslen,
                      optimizer.param_groups[0]['lr'],
                      batch_time=batch_time,
                      data_time=data_time,
                      loss=losses,
                      top1=top1,
                      top5=top5),
                  flush=True)
Example #21
0
def train(train_loader, model, criterion, optimizers, epoch):

    model.train()

    lr = None
    all_acc = []
    all_acc_top5 = []
    all_loss = []

    for ix in range(num_outputs):
        all_loss.append(AverageMeter())
        all_acc.append(AverageMeter())
        all_acc_top5.append(AverageMeter())

    LOG("==> train ", logFile)
    # print("num_outputs: ", num_outputs)

    for i, (input, target) in enumerate(train_loader):
        # print("input: ", input, input.shape)
        # print("target: ", target, target.shape)

        # bp_1
        if args.backpropagation == 1:
            # LOG("enter backpropagation method : " + str(args.backpropagation) +"\n", logFile)

            target = target.cuda(async=True)
            input_var = torch.autograd.Variable(input)
            target_var = torch.autograd.Variable(target)

            for ix in range(num_outputs):
                outputs = model(input_var)
                #
                optimizers[ix].zero_grad()

                loss = criterion(outputs[ix], target_var)

                loss.backward()

                optimizers[ix].step()

                # optimizer.zero_grad()
                # if ix == (num_outputs - 1):
                #     loss.backward()
                # else:
                #     loss.backward(retain_graph=True)

                # optimizer.step()
                all_loss[ix].update(loss.item(), input.size(0))

                # top 1 accuracy
                prec1 = accuracy(outputs[ix].data, target)
                all_acc[ix].update(prec1[0].data[0].item(), input.size(0))

                # # top 5 accuracy
                prec5 = accuracy(outputs[ix].data, target, topk=(5, ))
                # print("prec top 5-1: ", prec5)
                # print("prec top 5-2: ", prec5[0])
                # print("prec top 5-3: ", prec5[0].data[0].item())
                all_acc_top5[ix].update(prec5[0].data[0].item(), input.size(0))

        # elif args.backpropagation == 2:
        #     # LOG("enter backpropagation method : " + str(args.backpropagation) +"\n", logFile)
        #     # bp_2
        #     for ix in range(num_outputs):

        #         target = target.cuda(async=True)
        #         input_var = torch.autograd.Variable(input)
        #         target_var = torch.autograd.Variable(target)
        #         optimizer.zero_grad()
        #         outputs = model(input_var)
        #         loss = criterion(outputs[ix], target_var)
        #         loss.backward()
        #         optimizer.step()

        #         all_loss[ix].update(loss.item(), input.size(0))

        #         # top 1 accuracy
        #         prec1 = accuracy(outputs[ix].data, target)
        #         all_acc[ix].update(prec1[0].data[0].item(), input.size(0))

        #         # top 5 accuracy
        #         prec5 = accuracy(outputs[ix].data, target, topk=(5,))
        #         all_acc_top5[ix].update(prec5[0].data[0].item(), input.size(0))

        # elif args.backpropagation == 3:
        #     # LOG("enter backpropagation method : " + str(args.backpropagation) +"\n", logFile)
        #     # bp_3
        #     target = target.cuda(async=True)
        #     input_var = torch.autograd.Variable(input)
        #     target_var = torch.autograd.Variable(target)

        #     optimizer.zero_grad()
        #     outputs = model(input_var)
        #     losses = 0
        #     for ix in range(len(outputs)):
        #         # print("outputs[ix]: ", outputs[ix])
        #         loss = criterion(outputs[ix], target_var)
        #         losses += loss

        #         all_loss[ix].update(loss.item(), input.size(0))

        #         # top 1 accuracy
        #         prec1 = accuracy(outputs[ix].data, target)
        #         all_acc[ix].update(prec1[0].data[0].item(), input.size(0))

        #         # top 5 accuracy
        #         prec5 = accuracy(outputs[ix].data, target, topk=(5,))
        #         all_acc_top5[ix].update(prec5[0].data[0].item(), input.size(0))

        #     # losses = losses/len(outputs)
        #     losses.backward()
        #     optimizer.step()
        else:
            NotImplementedError

    accs = []
    accs_top5 = []
    ls = []
    for i, j, k in zip(all_acc, all_loss, all_acc_top5):
        accs.append(float(100 - i.avg))
        ls.append(j.avg)
        accs_top5.append(float(100 - k.avg))

    try:
        lr = float(str(optimizers[-1]).split("\n")[-5].split(" ")[-1])
    except:
        lr = 100

    print("train epoch top 5 error: ", accs_top5)
    return accs, ls, lr, accs_top5
def train(train_loader, model, criterion, optimizer, epoch, print_freq, colorization=False,scheduler=None):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    global cur_itrs

    # switch to train mode
    model.train()

    end = time.time()
    for i, (input, target) in enumerate(train_loader):
        
        cur_itrs+=1
        # measure data loading time
        data_time.update(time.time() - end)
       # if torch.cuda.is_available():
       #     target = target.cuda()
      #      input = input.cuda()
        target = target.to(device, dtype=torch.float32)
        input = input.to(device, dtype=torch.float32)

        if colorization:
            input = transforms.Resize(500)(input)
            target = transforms.Resize(500)(target)
            input = input.repeat(1,3,1,1)

        # compute output
        output = model(input)
        loss = criterion(output, target)
        # measure accuracy and record loss
        losses.update(loss.item(), input.size(0))
        if not colorization:
            prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
            top1.update(prec1, input.size(0))
            top5.update(prec5, input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if scheduler:
            scheduler.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % print_freq == 0:
            if not colorization:
                print('Epoch: [{0}][{1}/{2}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                      'Prec@5 {top5.val:.3f} ({top5.avg:.3f})\t'.format(
                    epoch, i, len(train_loader), batch_time=batch_time,
                    data_time=data_time, loss=losses, top1=top1, top5=top5))
            else:
                print('Epoch: [{0}][{1}/{2}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format(
                    epoch, i, len(train_loader), batch_time=batch_time,
                    data_time=data_time, loss=losses))

    # return loss, top1, top5 corresponding to each epoch
    return losses.avg, top1.avg, top5.avg