Ejemplo n.º 1
0
Archivo: LM.py Proyecto: lovygit/LMRC
    train_root = './data/tiny-imagenet-200_1/training/'
    test_root = './data/tiny-imagenet-200_1/testing/'

    use_cuda = torch.cuda.is_available()
    class_index = [i for i in range(0, TOTAL_CLASS_NUM)]  

    print('==> Building model..')
    # net = Simple_CNN_Multi_Head_LMRC(output_dim=output_dim, normalize=True)
    net = ResNet18_LMRC(outputDim=output_dim, normalize=True)

    for i in range(0, TOTAL_CLASS_NUM, CLASS_NUM_IN_BATCH):  

        print("==> Current Class: ", class_index[i:i+CLASS_NUM_IN_BATCH])

        trainLoader, train_classes = load_ImageNet200_online([train_root],
                                                          category_indexs=class_index[i:i + CLASS_NUM_IN_BATCH],
                                                          train=True, batchSize=batch_size, data_pool_root=None)
        testLoader, test_classes = load_ImageNet200_online([test_root],
                                                        category_indexs=class_index[:i + CLASS_NUM_IN_BATCH],
                                                        train=False, batchSize=batch_size, shuffle=False)

        print("train classes:", train_classes)
        print("test classes:", test_classes)

        # assign label vector
        label_set, label_dict, new_label = label_allotter(label_set, train_classes, label_dict, output_dim)
        print("label set:", label_set)
        print("label dict keys:", label_dict.keys())
        print("new classes:", new_label)

        # # add head
Ejemplo n.º 2
0
Archivo: LM.py Proyecto: lovygit/LMRC
def train(model, head_index, epoch, lr, output_dim, train_loader, test_loader, label_dict,
          modelPath, checkPoint, useCuda=True, adjustLR=False, earlyStop=False, tolearnce=4):

    tolerance_cnt = 0
    step = 0
    best_acc, best_new_acc, best_old_acc = 0, 0, 0

    old_model = copy.deepcopy(model)  # copy the old model

    if useCuda:
        model = model.cuda()
        old_model = old_model.cuda()

    ceriation = CosineLoss(output_dim)
    # optimizer = optim.Adam(net.parameters(), lr=lr)
    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4)

    # train
    for epoch_index in range(1, epoch+1):

        sum_loss = 0

        model.train()
        old_model.eval()
        # old_model.freeze_weight()

        if adjustLR:  # use LR adjustment
            ImgaeNet200_adjust_lr(optimizer, lr, epoch_index)

        for batch_idx, (x, target) in enumerate(train_loader):

            optimizer.zero_grad()

            #  get label vector
            y_vec_train_np = labels2Vec(target, label_dict, output_dim)
            y_vec_train = torch.from_numpy(y_vec_train_np)

            if useCuda:  # use GPU
                x, y_vec_train = x.cuda(), y_vec_train.cuda()
            x, y_vec_train = Variable(x), Variable(y_vec_train)

            # only calculate new loss
            out = model(x, head_index)
            loss = ceriation(out, y_vec_train)
            sum_loss += loss.data[0]

            loss.backward()
            optimizer.step()

            step += 1

            if (batch_idx + 1) % checkPoint == 0 or (batch_idx + 1) == len(trainLoader):
                print('==>>> epoch: {}, batch index: {}, step: {}, train loss: {:.6f},'.
                      format(epoch_index, batch_idx + 1, step, sum_loss/(batch_idx+1),))

        acc = inference(model, test_loader, label_dict, useCuda=True, k=5)

        # observe old and new class acc
        new_testLoader, new_test_classes = load_ImageNet200_online([test_root],
                                                                category_indexs=class_index[i:i + CLASS_NUM_IN_BATCH],
                                                                batchSize=batch_size, train=False)
        print("new test classes")
        new_acc = inference(model, new_testLoader, label_dict, useCuda=True, k=5)

        old_acc = 0
        if i != 0:
            old_testLoader, old_test_classes = load_ImageNet200_online([test_root],
                                                                    category_indexs=class_index[:i],
                                                                    batchSize=batch_size, train=False)
            print("old test classes")
            old_acc = inference(model, old_testLoader, label_dict, useCuda=True, k=5)

        # early stopping
        if earlyStop:
            if acc < best_acc:
                tolerance_cnt += 1
            else:
                best_acc = acc
                tolerance_cnt = 0
                saveModel(model, epoch_index, best_acc, modelPath)

            if tolerance_cnt >= tolearnce:
                print("early stopping training....")
                saveModel(model, epoch_index, best_acc, modelPath)
                return model
        else:
            if best_acc < acc:
                saveModel(model, epoch_index, acc, modelPath)
                best_acc = acc
                best_new_acc = new_acc
                best_old_acc = old_acc

    print("best acc:", best_acc)
    print("best new acc:", best_new_acc)
    print("best old acc:", best_old_acc)
Ejemplo n.º 3
0
def train(model,
          head_index,
          lamda,
          epoch,
          lr,
          train_loader,
          test_loader,
          T,
          modelPath,
          checkPoint,
          useCuda=True,
          adjustLR=False,
          earlyStop=False,
          tolearnce=4):

    tolerance_cnt = 0
    step = 0
    best_acc = 0

    old_model = copy.deepcopy(model)  # copy old model

    if useCuda:
        model = model.cuda()
        old_model = old_model.cuda()

    ceriation = nn.CrossEntropyLoss()
    optimizer = optim.Adam(net.parameters(), lr=lr)
    # optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4)

    # train
    for epoch_index in range(1, epoch + 1):

        sum_loss = 0
        old_sum_loss = 0
        new_sum_loss = 0

        model.train()
        old_model.eval()
        old_model.freeze_weight()

        if adjustLR:  # use LR adjustment
            ImgaeNet200_adjust_lr(optimizer, lr, epoch_index)

        for batch_idx, (x, target) in enumerate(train_loader):

            optimizer.zero_grad()

            if useCuda:  # use GPU
                x, target = x.cuda(), target.cuda()
            test_x = Variable(x, volatile=True)
            x, target = Variable(x), Variable(target)

            # get response from old heads
            old_outputs = []
            old_mid_out = old_model.get_middle_output(test_x)
            for head_idx in range(len(model.head_list) - 1):
                old_output = old_model.get_output(old_mid_out, head_idx)
                old_output = old_output.cpu().data.numpy()
                old_output = Variable(torch.from_numpy(old_output).cuda())
                old_outputs.append(old_output)

            # distilling loss
            old_loss = Variable(torch.zeros(1).cuda())
            new_mid_out = model.get_middle_output(x)
            for idx in range(len(old_outputs)):
                out = model.get_output(new_mid_out, idx)
                old_loss += MultiClassCrossEntropy(out, old_outputs[idx], T=T)

            # calculate new loss
            out = model.get_output(new_mid_out, head_index)
            target -= CLASS_NUM_IN_BATCH * head_index  # transform the class labels
            new_loss = ceriation(out, target)

            loss = new_loss + lamda * old_loss

            sum_loss += loss.data[0]
            old_sum_loss += old_loss.data[0]
            new_sum_loss += new_loss.data[0]

            loss.backward()
            optimizer.step()

            step += 1

            if (batch_idx + 1) % checkPoint == 0 or (batch_idx +
                                                     1) == len(trainLoader):
                print(
                    '==>>> epoch: {}, batch index: {}, step: {}, train loss: {:.6f},'
                    ' new loss: {:.6f}, old loss: {:.6f}'.format(
                        epoch_index, batch_idx + 1, step,
                        sum_loss / (batch_idx + 1),
                        new_sum_loss / (batch_idx + 1),
                        old_sum_loss / (batch_idx + 1)))

        acc = inference(model, test_loader, useCuda=True, k=5)

        #  observe new and old classes acc
        new_testLoader, new_test_classes = load_ImageNet200_online(
            [test_root],
            category_indexs=class_index[i:i + CLASS_NUM_IN_BATCH],
            batchSize=batch_size,
            train=False)
        print("new test classes")
        new_acc = inference(model, new_testLoader, useCuda=True, k=5)
        if i != 0:
            old_testLoader, old_test_classes = load_ImageNet200_online(
                [test_root],
                category_indexs=class_index[:i],
                batchSize=batch_size,
                train=False)
            print("old test classes")
            old_acc = inference(model, old_testLoader, useCuda=True, k=5)

        # early stopping
        if earlyStop:
            if acc < best_acc:
                tolerance_cnt += 1
            else:
                best_acc = acc
                tolerance_cnt = 0
                saveModel(model, epoch_index, best_acc, modelPath)

            if tolerance_cnt >= tolearnce:
                print("early stopping training....")
                saveModel(model, epoch_index, best_acc, modelPath)
                return model
        else:
            if best_acc < acc:
                saveModel(model, epoch_index, acc, modelPath)
                best_acc = acc

    print("best acc:", best_acc)