Esempio n. 1
0
def testGlass(loader, averaging=False):
    target, data, nfeat, nsamples = loader.glass()
    inputs = torch.from_numpy(data).unsqueeze(1).float()
    ut = np.unique(target)
    print(nsamples, 'samples,', nfeat, 'dimensions,targets:', Counter(target))
    Nclusters = len(ut)
    Nsparse = 40
    batchsize = 40
    Nfactor = 20
    model = dmClustering(bioNet(dim=nfeat),
                         Nfactor=Nfactor,
                         Nclusters=Nclusters)
    F, labels = model.unSupervisedLearner(inputs,
                                          Nepochs=10,
                                          Ninner=1,
                                          sparsity=Nsparse,
                                          bsize=batchsize,
                                          lamda=1)
    if not averaging:
        print('clusters:', ''.join(str(l) for l in labels))
        print(Counter(labels))
        # print('targets:', ''.join(str(t) for t in target))
        nmi = normalized_mutual_info_score(target, labels)
        print('Normalized mutual information(NMI): {:.2f}%'.format(nmi * 100))
        acc = group_label_acc(target, labels)
        print('Group accuracy: {:.2f}%'.format(acc * 100))
        return acc, nmi
    else:
        return group_label_acc(target, labels)
Esempio n. 2
0
def testCol100(loader):
    target, data, nfeat, nsamples = loader.col100()
    inputs = torch.from_numpy(data).unsqueeze(1).float()
    ut = np.unique(target)
    print(nsamples, 'samples,', nfeat, 'dimensions,targets:', Counter(target))
    Nclusters = len(ut)
    Nsparse = 20
    batchsize = 50
    Nfactor = 10
    model = dmClustering(objectNet1d(dim=nfeat),
                         Nfactor=Nfactor,
                         Nclusters=Nclusters)
    F, labels = model.unSupervisedLearner(inputs,
                                          Nepochs=10,
                                          Ninner=1,
                                          sparsity=Nsparse,
                                          bsize=batchsize,
                                          lamda=1)
    ctr = Counter(labels)
    print('clusters:', ''.join(str(l) for l in labels))
    print(ctr)
    nmi = normalized_mutual_info_score(target, labels)
    print('Normalized mutual information(NMI): {:.2f}%'.format(nmi * 100))
    acc = group_label_acc(target, labels)
    print('Group accuracy: {:.2f}%'.format(acc * 100))
    print('Number of clusters: {:d} '.format(len(ctr)))
    return acc, nmi
Esempio n. 3
0
def testSpiral(loader, averaging=False):
    target, inputs, nfeat, nsamples = loader.spiral()
    if type(inputs) is np.ndarray:
        inputs = torch.from_numpy(inputs).unsqueeze(1).float()
        ut = np.unique(target)
    else:
        ut = target.unique()
    print(nsamples, ' samples', nfeat, ' dimensions ,targets:',
          Counter(target))
    Nclusters = len(ut)
    Nsparse = 3
    batchsize = 10
    model = dmClustering(shapeNet(), Nfactor=0, Nclusters=Nclusters)
    F, labels = model.unSupervisedLearner(inputs,
                                          Nepochs=20,
                                          sparsity=Nsparse,
                                          bsize=batchsize,
                                          lamda=1)
    if not averaging:
        #print(F)
        print('clusters:', ''.join(str(l) for l in labels))
        print(Counter(labels))
        #print('targets:', ''.join(str(t) for t in target))
        nmi = normalized_mutual_info_score(target, labels)
        print('Normalized mutual information(NMI): {:.2f}%'.format(nmi * 100))
        acc = group_label_acc(target, labels)
        print('Group accuracy: {:.2f}%'.format(acc * 100))
        return acc, nmi
    else:
        return group_label(target, labels)
Esempio n. 4
0
def testUsps(loader, pretrained=True):
    target, data, nfeat, nsamples = loader.usps()
    inputs = torch.from_numpy(data).unsqueeze(1).float()
    ut = np.unique(target)
    print(nsamples, 'samples,', nfeat, 'dimensions,targets:', Counter(target))
    Nclusters = len(ut)
    modelFile = './USPS_metricNet.dat'
    net = faceNet1d(dim=nfeat)
    if pretrained:
        net.load_state_dict(torch.load(modelFile))
    model = dmClustering(net, Nclusters=Nclusters)
    tau = 100000.0
    dfx, _ = model.metricLearner(inputs,
                                 torch.from_numpy(target),
                                 Nepochs=1500,
                                 bsize=1000,
                                 tau=tau)
    torch.save(net.state_dict(), modelFile)
    labels = metricCluster(dfx, ratio=tau / 2)
    ctr = Counter(labels)
    print('clusters:', ''.join(str(l) for l in labels))
    print(ctr)
    nmi = normalized_mutual_info_score(target, labels)
    print('Normalized mutual information(NMI): {:.2f}%'.format(nmi * 100))
    acc = group_label_acc(target, labels)
    print('Group accuracy: {:.2f}%'.format(acc * 100))
    print('Number of clusters: {:d} '.format(len(ctr)))
    return acc, nmi
Esempio n. 5
0
def avgPred(ntimes, loader, testcase):
    target, inputs, nfeat, nsamples = loader.spiral()
    preds = torch.Tensor(ntimes, nsamples)
    for i in range(ntimes):
        print('predicting {:d}/{:d} round'.format(i + 1, ntimes))
        if type(inputs) is np.ndarray:
            preds[i, :] = torch.from_numpy(testcase(loader, True))
        else:
            preds[i, :] = testcase(loader, True)
    print('max Voting')
    pred, _ = preds.mode(0)
    nmi = normalized_mutual_info_score(target, pred.numpy())
    acc = group_label_acc(target, pred.numpy())
    return acc, nmi
Esempio n. 6
0
def testBlobs(tosave=False):
    inputs, target = datasets.make_blobs(n_samples=600,
                                         cluster_std=[2.0, 1.5, 1.0])
    nsamples, nfeat = inputs.shape
    inputsTensor = torch.from_numpy(inputs).unsqueeze(1).float()
    ut = np.unique(target)
    print(nsamples, ' samples', nfeat, ' dimensions ,targets:',
          Counter(target))
    Nclusters = len(ut)
    Nsparse = 10
    Nfactor = 0
    batchsize = 25
    model = dmClustering(shapeNet(dim=nfeat),
                         Nfactor=Nfactor,
                         Nclusters=Nclusters)
    F, labels = model.unSupervisedLearner(inputsTensor,
                                          Nepochs=6,
                                          Ninner=1,
                                          sparsity=Nsparse,
                                          bsize=batchsize,
                                          lamda=1)

    print('clusters:', ''.join(str(l) for l in labels))
    print(Counter(labels))
    nmi = normalized_mutual_info_score(target, labels)
    print('Normalized mutual information(NMI): {:.2f}%'.format(nmi * 100))
    acc = group_label_acc(target, labels)
    print('Group accuracy: {:.2f}%'.format(acc * 100))

    model = SpectralClustering(n_clusters=2).fit(inputs)
    labels_spcl = model.labels_
    ss = torch.from_numpy(model.affinity_matrix_).float()
    N = ss.size(0)
    degs = ss.sum(1)
    Ds = torch.eye(N)
    Ds[Ds == 1] = degs
    Ls = Ds - ss
    _, v = Ls.symeig(eigenvectors=True)
    f_spcl = v[:, :2].numpy()
    fig, ax = plt.subplots(ncols=5, figsize=(20, 3))
    c = ['b', 'r', 'm']
    for i, l in enumerate(np.unique(target)):
        id = (l == target)
        ax[0].scatter(inputs[id, 0], inputs[id, 1], marker='.', color=c[i])
    ax[0].set_title('Three Blobs')
    for i, l in enumerate(np.unique(labels_spcl)):
        id = (l == labels_spcl)
        ax[1].scatter(inputs[id, 0], inputs[id, 1], marker='.', color=c[i])
    ax[1].set_title('Spectral Clustering')
    for i, l in enumerate(np.unique(labels)):
        id = (l == labels)
        ax[2].scatter(f_spcl[id, 0], f_spcl[id, 1], marker='.', color=c[i])
    ax[2].set_title('Learnt Subspace')
    for i, l in enumerate(np.unique(labels)):
        id = (l == labels)
        ax[3].scatter(inputs[id, 0], inputs[id, 1], marker='.', color=c[i])
    ax[3].set_title('Proposed Clustering')
    for i, l in enumerate(np.unique(labels)):
        id = (l == labels)
        ax[4].scatter(F.detach().numpy()[id, 0],
                      F.detach().numpy()[id, 1],
                      marker='.',
                      color=c[i])
    ax[4].set_title('Learnt Subspace')
    plt.tight_layout()
    if tosave:
        fig.savefig('ClusteringCompare-ThreeBlobs.png')
        return 1
    else:
        plt.show(block=False)
        time.sleep(5)
        return 1