Esempio n. 1
0
def zsl_acc(semantic_predicted, semantic_gt, opts):
    # zsl_acc calculates zero-shot classification accruacy
    #
    # INPUTS:
    #	semantic_prediced: predicted semantic labels
    # 	semantic_gt: ground truth semantic labels
    # 	opts: other parameters
    #
    # OUTPUT:
    # 	zsl_accuracy: zero-shot classification accuracy (per-sample)
    pre_label = []
    dist = 1 - distCosine(
        semantic_predicted,
        normalizeFeature(semantic_gt.transpose()).transpose())
    y_hit_k = np.zeros((dist.shape[0], opts.HITK))
    for idx in range(0, dist.shape[0]):
        sorted_id = sorted(range(len(dist[idx, :])),
                           key=lambda k: dist[idx, :][k],
                           reverse=True)
        y_hit_k[idx, :] = opts.test_classes_id[sorted_id[0:opts.HITK]]

    n = 0
    true_label = opts.test_labels.tolist()
    true_label = [int(i) for i in true_label]
    # for idx in range(0, dist.shape[0]):
    # 	if opts.test_labels[idx] in y_hit_k[idx,:]:
    # 		n = n + 1
    # zsl_accuracy = float(n) / dist.shape[0] * 100
    # return zsl_accuracy, y_hit_k
    for idx in range(0, dist.shape[0]):
        if opts.test_labels[idx] in y_hit_k[idx, :]:
            pre_label.append(int(true_label[idx]))

        else:
            pre_label.append(int(y_hit_k[idx, 0]))
    zsl_accuracy = macro_acc(true_label, pre_label)
    return zsl_accuracy * 100, y_hit_k
Esempio n. 2
0
def dtest(te, model, combine_dir, devce, loss_fn=nn.MSELoss()):

    # get vec mat
    vec_mat = get_vec_mat()
    pth = combine_dir
    dirs_general = os.listdir(pth)
    vec_m_combine = []  # unseen 训练集的矩阵
    ids_combine = []
    for f in dirs_general:  # 对于seen和unseen类
        id = int(re.sub("\D", "", f))
        ids_combine.append(id)
        idx = id - 1
        vec_m_combine.append(vec_mat[idx])

    vec_m_combine = np.array(vec_m_combine)
    #print("size of vec test mat :", vec_m_combine.shape)  # 49+19=68,500
    print('test begin:')
    vec_m_combine = vec_m_combine.transpose()

    with torch.no_grad():
        model.eval()
        real_label_test = []
        pre_label_test_1 = []
        pre_label_test_2 = []  # hit 2
        pre_label_test_5 = []  # hit 5
        loss_total_test = 0
        for (vx, vy) in te:
            val_vec_y, val_tag_y = vy
            val_vec_y = val_vec_y.to(devce, dtype=torch.float)
            vx = vx.to(devce, dtype=torch.float)
            vy_pred = model(vx)
            vloss = loss_fn(vy_pred, val_vec_y)  # test set loss compute
            loss_total_test += vloss.item()
            ### pre whether correct? ###
            #
            real_label_test.extend(val_tag_y)
            #
            vy_pred_cpu = vy_pred.cpu().detach().numpy()
            vsz = len(val_tag_y)
            vtt = np.dot(vy_pred_cpu,
                         vec_m_combine)  # judge by dot Multiplication
            for n in range(vsz):
                e = heapq.nlargest(5, range(len(vtt[n])),
                                   vtt[n].take)  # top 5 hit
                vi = 0
                while vi < 5:
                    if (ids_combine[e[vi]] == val_tag_y[n]):  # pre right
                        break
                    vi += 1
                pre_label_test_1.append(ids_combine[e[0]])
                pre_label_test_2.append(ids_combine[e[0]])
                pre_label_test_5.append(ids_combine[e[0]])

                if (vi <= 1):
                    pre_label_test_2[-1] = val_tag_y[n]
                    pre_label_test_5[-1] = val_tag_y[n]
                elif (vi <= 4):
                    pre_label_test_5[-1] = val_tag_y[n]

        acc_test_1 = macro_acc(real_label_test, pre_label_test_1)
        acc_test_2 = macro_acc(real_label_test, pre_label_test_2)
        acc_test_5 = macro_acc(real_label_test, pre_label_test_5)
        print(
            'test macro_acc_1: {:04.2f}% macro_acc_2: {:04.2f}% macro_acc_5: {:04.2f}%'
            .format(acc_test_1 * 100, acc_test_2 * 100, acc_test_5 * 100))
        return acc_test_1, acc_test_2, acc_test_5, loss_total_test
Esempio n. 3
0
def train(core_model, folder_dir_train, folder_dir_eval, loss_fn):
    output_folder = './save_model'
    os.makedirs(output_folder, exist_ok=True)
    # load training data
    tr_img = data_reader(folder_dir_train)
    tr = DataLoader(tr_img, batch_size=batch_size, shuffle=True, num_workers=8)
    optimizer_tag = optim.Adam(core_model.parameters(),
                               lr=lr,
                               weight_decay=wds)
    print('using {} as criterion'.format(loss_fn))
    # load testing data
    te_img = data_reader(folder_dir_eval)  # test
    te = DataLoader(te_img, batch_size=50, num_workers=8)  # test
    # get semantic embedding from .mat file (i.e., training labels)
    vec_mat = get_vec_mat()
    pth = folder_dir_train
    dirs_seen = os.listdir(pth)
    pth = folder_dir_eval
    vec_m = []  # seen matrix for training set
    ids = []  # id of 25 seen classes
    for f in dirs_seen:  # data of seen class
        id = int(re.sub("\D", "", f))
        ids.append(id)
        idx = id - 1
        vec_m.append(vec_mat[idx])

    vec_m = np.array(vec_m)
    print("size of seen semantic embedding:", vec_m.shape)  # (25,500)
    vec_m = vec_m.transpose()
    # hit @ 1, 2, 5
    best_acc_te_1, best_acc_te_2, best_acc_te_5 = 0, 0, 0
    writer = SummaryWriter()
    # training and testing, select the better result to save or save every 10 epochs
    for epoch in range(epoch_num):
        core_model.train()
        loss_total = 0
        real_label = []
        pre_label_1 = []
        pre_label_2 = []  # hit 2
        pre_label_5 = []  # hit 5
        print('train begin:')
        for i, (x, y) in enumerate(tr, 1):
            vec_y, tag_y = y  # vec  tag
            x = x.to(GPU, dtype=torch.float)
            vec_y = vec_y.to(GPU, dtype=torch.float)
            core_model.zero_grad()
            #  print(type(x))
            y_pred = core_model(x)
            loss = loss_fn(y_pred, vec_y)
            loss.backward()
            optimizer_tag.step()
            ### pre whether correct? ###
            #
            real_label.extend(tag_y)  # batch_size
            #
            sz = len(tag_y)
            y_pred_cpu = y_pred.cpu().detach().numpy()
            tt = np.dot(y_pred_cpu, vec_m)  # judge by dot Multiplication
            for n in range(sz):
                e = heapq.nlargest(5, range(len(tt[n])), tt[n].take)
                ii = 0
                while ii < 5:
                    if (ids[e[ii]] == tag_y[n]):
                        break
                    ii += 1
                pre_label_1.append(ids[e[0]])
                pre_label_2.append(ids[e[0]])
                pre_label_5.append(ids[e[0]])
                if (ii <= 1):
                    pre_label_2[-1] = tag_y[n]
                    pre_label_5[-1] = tag_y[n]
                elif (ii <= 4):
                    pre_label_5[-1] = tag_y[n]

            loss_total += loss.item()
        acc_1 = macro_acc(real_label, pre_label_1)  # hit 1
        acc_2 = macro_acc(real_label, pre_label_2)  # hit 2
        acc_5 = macro_acc(real_label, pre_label_5)  # hit 5
        print(
            'Epoch {:4d}/{:4d} total_loss:{:06.5f} macro_acc_1: {:04.2f}% macro_acc_2: {:04.2f}% macro_acc_5: {:04.2f}%'
            .format(epoch, epoch_num, loss_total, acc_1 * 100, acc_2 * 100,
                    acc_5 * 100))
        #print('total time: {:>5.2f}s'.format(time.time() - epoch_st))
        writer.add_scalars('macro_acc', {
            'hit_1': acc_1,
            'hit_2': acc_2,
            'hit_5': acc_5
        }, epoch)
        writer.add_scalar('loss', loss_total, epoch)

        # testing Generalized ZSL
        combine_dir = "./ZSL_DATA/test_2/combine"
        acc_test_1, acc_test_2, acc_test_5, loss_total_test = DeVise_test_GZSL.dtest(
            te, core_model, combine_dir, GPU, loss_fn)
        #print(
        #    'test macro_prec_1: {:04.2f}% macro_acc_2: {:04.2f}% macro_acc_5: {:04.2f}%'.format(acc_test_1 * 100,
        #                                                                                          acc_test_2 * 100,
        #                                                                                         acc_test_5 * 100))
        writer.add_scalars('test macro_acc', {
            'hit_1': acc_test_1,
            'hit_2': acc_test_2,
            'hit_5': acc_test_5
        }, epoch)
        writer.add_scalar('test loss', loss_total_test, epoch)
        if epoch > 5 and (acc_test_1 > best_acc_te_1
                          or acc_test_2 > best_acc_te_2
                          or acc_test_5 > best_acc_te_5):
            ans = "_"
            if acc_test_1 > best_acc_te_1:
                best_acc_te_1 = acc_test_1
                ans += "hit1_"
                ans += str(acc_test_1 * 100)[:5]
                ans += "%"
            if acc_test_2 > best_acc_te_2:
                best_acc_te_2 = acc_test_2
                ans += "hit2_"
                ans += str(acc_test_2 * 100)[:5]
                ans += "%"
            if acc_test_5 > best_acc_te_5:
                best_acc_te_5 = acc_test_5
                ans += "hit5_"
                ans += str(acc_test_5 * 100)[:5]
                ans += "%"
            # './save_model'
            torch.save(
                core_model.state_dict(),
                os.path.join(
                    output_folder,
                    str(round(time.time())) + 'epoch' + str(epoch) + ans +
                    '.pkl'))

        # each 10 epoch save
        elif (epoch > 0 and epoch % 10 == 0):
            torch.save(
                core_model.state_dict(),
                os.path.join(
                    output_folder,
                    str(round(time.time())) + 'epoch' + str(epoch) + '.pkl'))

    writer.close()
    print("best_acc_te_1:", best_acc_te_1)
    print("best_acc_te_2:", best_acc_te_2)
    print("best_acc_te_5:", best_acc_te_5)