コード例 #1
0
def init_prob_kmeans(model, eval_loader, args):
    torch.manual_seed(1)
    model = model.to(device)
    # cluster parameter initiate
    model.eval()
    targets = np.zeros(len(eval_loader.dataset))
    feats = np.zeros((len(eval_loader.dataset), 1024))
    for _, (x, _, label, idx) in enumerate(eval_loader):
        x = x.to(device)
        _, feat = model(x)
        feat = feat.view(x.size(0), -1)
        idx = idx.data.cpu().numpy()
        feats[idx, :] = feat.data.cpu().numpy()
        targets[idx] = label.data.cpu().numpy()
    # evaluate clustering performance
    pca = PCA(n_components=args.n_clusters)
    feats = pca.fit_transform(feats)
    kmeans = KMeans(n_clusters=args.n_clusters, n_init=20)
    y_pred = kmeans.fit_predict(feats)
    acc, nmi, ari = cluster_acc(targets,
                                y_pred), nmi_score(targets, y_pred), ari_score(
                                    targets, y_pred)
    print('Init acc {:.4f}, nmi {:.4f}, ari {:.4f}'.format(acc, nmi, ari))
    probs = feat2prob(torch.from_numpy(feats),
                      torch.from_numpy(kmeans.cluster_centers_))
    return kmeans.cluster_centers_, probs
コード例 #2
0
def test(model, test_loader, args):
    model.eval()
    preds=np.array([])
    targets=np.array([])
    for batch_idx, (x, label, _) in enumerate(tqdm(test_loader)):
        x, label = x.to(device), label.to(device)
        output1, output2, _ = model(x)
        if args.head=='head1':
            output = output1
        else:
            output = output2
        _, pred = output.max(1)
        targets=np.append(targets, label.cpu().numpy())
        preds=np.append(preds, pred.cpu().numpy())
    acc, nmi, ari = cluster_acc(targets.astype(int), preds.astype(int)), nmi_score(targets, preds), ari_score(targets, preds) 
    print('Test acc {:.4f}, nmi {:.4f}, ari {:.4f}'.format(acc, nmi, ari))
コード例 #3
0
ファイル: cifar100_DTC.py プロジェクト: tsantosh1098/DTC
def test(model, test_loader, args, epoch='test'):
    model.eval()
    preds=np.array([])
    targets=np.array([])
    feats = np.zeros((len(test_loader.dataset), args.n_clusters))
    probs= np.zeros((len(test_loader.dataset), args.n_clusters))
    for batch_idx, (x, label, idx) in enumerate(tqdm(test_loader)):
        x, label = x.to(device), label.to(device)
        _, feat = model(x)
        prob = feat2prob(feat, model.center)
        _, pred = prob.max(1)
        targets=np.append(targets, label.cpu().numpy())
        preds=np.append(preds, pred.cpu().numpy())
        idx = idx.data.cpu().numpy()
        feats[idx, :] = feat.cpu().detach().numpy()
        probs[idx, :] = prob.cpu().detach().numpy()
    acc, nmi, ari = cluster_acc(targets.astype(int), preds.astype(int)), nmi_score(targets, preds), ari_score(targets, preds)
    print('Test acc {:.4f}, nmi {:.4f}, ari {:.4f}'.format(acc, nmi, ari))
    probs = torch.from_numpy(probs)
    return acc, nmi, ari, probs 
コード例 #4
0
ファイル: omniglot_DTC.py プロジェクト: tsantosh1098/DTC
def test(model, eval_loader, args):
    model.eval()
    targets = np.zeros(len(eval_loader.dataset)) 
    y_pred = np.zeros(len(eval_loader.dataset)) 
    probs= np.zeros((len(eval_loader.dataset), args.n_clusters))
    for _, (x, _, label, idx) in enumerate(eval_loader):
        x = x.to(device)
        _, feat = model(x)
        prob = feat2prob(feat, model.center)
        #  prob = F.softmax(logit, dim=1)
        idx = idx.data.cpu().numpy()
        y_pred[idx] = prob.data.cpu().detach().numpy().argmax(1)
        targets[idx] = label.data.cpu().numpy()
        probs[idx, :] = prob.cpu().detach().numpy()
    # evaluate clustering performance
    y_pred = y_pred.astype(np.int64)
    acc, nmi, ari = cluster_acc(targets, y_pred), nmi_score(targets, y_pred), ari_score(targets, y_pred)
    print('Test acc {:.4f}, nmi {:.4f}, ari {:.4f}'.format(acc, nmi, ari))
    probs = torch.from_numpy(probs)
    return acc, nmi, ari, probs
コード例 #5
0
def estimate_k(model, unlabeled_loader, labeled_loaders, args):
    u_num = len(unlabeled_loader.dataset)
    u_targets = np.zeros(u_num)
    u_feats = np.zeros((u_num, 1024))
    print('extracting features for unlabeld data')
    for _, (x, _, label, idx) in enumerate(unlabeled_loader):
        x = x.to(device)
        _, feat = model(x)
        feat = feat.view(x.size(0), -1)
        idx = idx.data.cpu().numpy()
        u_feats[idx, :] = feat.data.cpu().numpy()
        u_targets[idx] = label.data.cpu().numpy()
    cand_k = np.arange(args.max_cand_k)
    #get acc for labeled data with short listed k
    best_ks = np.zeros(len(omniglot_background_val_alphabets))
    print('extracting features for labeld data')
    for alphabetStr in omniglot_background_val_alphabets:
        labeled_loader = labeled_loaders[alphabetStr]
        args.num_val_cls = labeled_loader.num_classes

        l_num = len(labeled_loader.dataset)
        l_targets = np.zeros(l_num)
        l_feats = np.zeros((l_num, 1024))
        for _, (x, _, label, idx) in enumerate(labeled_loader):
            x = x.to(device)
            _, feat = model(x)
            feat = feat.view(x.size(0), -1)
            idx = idx.data.cpu().numpy()
            l_feats[idx, :] = feat.data.cpu().numpy()
            l_targets[idx] = label.data.cpu().numpy()

        l_classes = set(l_targets)
        num_lt_cls = int(round(len(l_classes) * args.split_ratio))
        lt_classes = set(random.sample(l_classes, num_lt_cls))
        lv_classes = l_classes - lt_classes

        lt_feats = np.empty((0, l_feats.shape[1]))
        lt_targets = np.empty(0)
        for c in lt_classes:
            lt_feats = np.vstack((lt_feats, l_feats[l_targets == c]))
            lt_targets = np.append(lt_targets, l_targets[l_targets == c])

        lv_feats = np.empty((0, l_feats.shape[1]))
        lv_targets = np.empty(0)
        for c in lv_classes:
            lv_feats = np.vstack((lv_feats, l_feats[l_targets == c]))
            lv_targets = np.append(lv_targets, l_targets[l_targets == c])

        cvi_list = np.zeros(len(cand_k))
        acc_list = np.zeros(len(cand_k))
        cat_pred_list = np.zeros([len(cand_k), u_num + l_num])
        print('estimating K ...')
        for i in range(len(cand_k)):
            cvi_list[i], cat_pred_i = labeled_val_fun(
                np.concatenate((lv_feats, u_feats)), lt_feats, lt_targets,
                cand_k[i] + args.num_val_cls)
            cat_pred_list[i, :] = cat_pred_i
            acc_list[i] = cluster_acc(
                lv_targets,
                cat_pred_i[len(lt_targets):len(lt_targets) + len(lv_targets)])
        idx_cvi = np.max(np.argwhere(cvi_list == np.max(cvi_list)))
        idx_acc = np.max(np.argwhere(acc_list == np.max(acc_list)))

        idx_best = int(math.ceil((idx_cvi + idx_acc) * 1.0 / 2))
        cat_pred = cat_pred_list[idx_best, :]
        cnt_cat = Counter(cat_pred.tolist())
        cnt_l = Counter(cat_pred[:l_num].tolist())
        cnt_ul = Counter(cat_pred[l_num:].tolist())
        bin_cat = [x[1] for x in sorted(cnt_cat.items())]
        bin_l = [x[1] for x in sorted(cnt_l.items())]
        bin_ul = [x[1] for x in sorted(cnt_ul.items())]
        expectation = u_num * 1.0 / (cand_k[idx_best] + args.num_val_cls)
        best_k = np.sum(
            np.array(bin_ul) /
            np.max(bin_ul).astype(float) > args.min_max_ratio)
        print('current best K {}'.format(best_k))
        i_alpha = omniglot_background_val_alphabets.index(alphabetStr)
        best_ks[i_alpha] = best_k
    best_k = np.ceil(np.mean(best_ks)).astype(np.int32)
    kmeans = KMeans(n_clusters=best_k)
    u_pred = kmeans.fit_predict(u_feats).astype(np.int32)
    acc, nmi, ari = cluster_acc(u_targets, u_pred), nmi_score(
        u_targets, u_pred), ari_score(u_targets, u_pred)
    print('Final K {}, acc {:.4f}, nmi {:.4f}, ari {:.4f}'.format(
        best_k, acc, nmi, ari))
    return best_k
コード例 #6
0
def estimate_k(model, unlabeled_loader, labeled_loader, args):
    u_num = len(unlabeled_loader.dataset)
    u_targets = np.zeros(u_num)
    u_feats = np.zeros((u_num, 512))
    print('extracting features for unlabeld data')
    for _, (x, label, idx) in enumerate(tqdm(unlabeled_loader)):
        x = x.to(device)
        _, feat = model(x)
        feat = feat.view(x.size(0), -1)
        idx = idx.data.cpu().numpy()
        u_feats[idx, :] = feat.data.cpu().numpy()
        u_targets[idx] = label.data.cpu().numpy()
    cand_k = np.arange(args.max_cand_k)
    #get acc for labeled data with short listed k
    l_num = len(labeled_loader.dataset)
    l_targets = np.zeros(l_num)
    l_feats = np.zeros((l_num, 512))
    print('extracting features for labeld data')
    for _, (x, label, idx) in enumerate(tqdm(labeled_loader)):
        x = x.to(device)
        _, feat = model(x)
        feat = feat.view(x.size(0), -1)
        idx = idx.data.cpu().numpy()
        l_feats[idx, :] = feat.data.cpu().numpy()
        l_targets[idx] = label.data.cpu().numpy()

    l_classes = set(l_targets)
    num_lt_cls = int(round(len(l_classes) * args.split_ratio))
    lt_classes = set(random.sample(
        l_classes,
        num_lt_cls))  #random sample 5 classes from all labeled classes
    lv_classes = l_classes - lt_classes

    lt_feats = np.empty((0, l_feats.shape[1]))
    lt_targets = np.empty(0)
    for c in lt_classes:
        lt_feats = np.vstack((lt_feats, l_feats[l_targets == c]))
        lt_targets = np.append(lt_targets, l_targets[l_targets == c])

    lv_feats = np.empty((0, l_feats.shape[1]))
    lv_targets = np.empty(0)
    for c in lv_classes:
        lv_feats = np.vstack((lv_feats, l_feats[l_targets == c]))
        lv_targets = np.append(lv_targets, l_targets[l_targets == c])

    cvi_list = np.zeros(len(cand_k))
    acc_list = np.zeros(len(cand_k))
    cat_pred_list = np.zeros([len(cand_k), u_num + l_num])
    print('estimating K ...')
    for i in range(len(cand_k)):
        cvi_list[i], cat_pred_i = labeled_val_fun(
            np.concatenate((lv_feats, u_feats)), lt_feats, lt_targets,
            cand_k[i] + args.num_val_cls)
        cat_pred_list[i, :] = cat_pred_i
        acc_list[i] = cluster_acc(
            lv_targets,
            cat_pred_i[len(lt_targets):len(lt_targets) + len(lv_targets)])
        best_k = get_best_k(cvi_list[:i + 1], acc_list[:i + 1],
                            cat_pred_list[:i + 1], l_num)
        print('current best K {}'.format(best_k))
    kmeans = KMeans(n_clusters=best_k)
    u_pred = kmeans.fit_predict(u_feats).astype(np.int32)
    acc, nmi, ari = cluster_acc(u_targets, u_pred), nmi_score(
        u_targets, u_pred), ari_score(u_targets, u_pred)
    print('Final K {}, acc {:.4f}, nmi {:.4f}, ari {:.4f}'.format(
        best_k, acc, nmi, ari))
    return best_k