def testGlass(loader, averaging=False): target, data, nfeat, nsamples = loader.glass() inputs = torch.from_numpy(data).unsqueeze(1).float() ut = np.unique(target) print(nsamples, 'samples,', nfeat, 'dimensions,targets:', Counter(target)) Nclusters = len(ut) Nsparse = 40 batchsize = 40 Nfactor = 20 model = dmClustering(bioNet(dim=nfeat), Nfactor=Nfactor, Nclusters=Nclusters) F, labels = model.unSupervisedLearner(inputs, Nepochs=10, Ninner=1, sparsity=Nsparse, bsize=batchsize, lamda=1) if not averaging: print('clusters:', ''.join(str(l) for l in labels)) print(Counter(labels)) # print('targets:', ''.join(str(t) for t in target)) nmi = normalized_mutual_info_score(target, labels) print('Normalized mutual information(NMI): {:.2f}%'.format(nmi * 100)) acc = group_label_acc(target, labels) print('Group accuracy: {:.2f}%'.format(acc * 100)) return acc, nmi else: return group_label_acc(target, labels)
def testCol100(loader): target, data, nfeat, nsamples = loader.col100() inputs = torch.from_numpy(data).unsqueeze(1).float() ut = np.unique(target) print(nsamples, 'samples,', nfeat, 'dimensions,targets:', Counter(target)) Nclusters = len(ut) Nsparse = 20 batchsize = 50 Nfactor = 10 model = dmClustering(objectNet1d(dim=nfeat), Nfactor=Nfactor, Nclusters=Nclusters) F, labels = model.unSupervisedLearner(inputs, Nepochs=10, Ninner=1, sparsity=Nsparse, bsize=batchsize, lamda=1) ctr = Counter(labels) print('clusters:', ''.join(str(l) for l in labels)) print(ctr) nmi = normalized_mutual_info_score(target, labels) print('Normalized mutual information(NMI): {:.2f}%'.format(nmi * 100)) acc = group_label_acc(target, labels) print('Group accuracy: {:.2f}%'.format(acc * 100)) print('Number of clusters: {:d} '.format(len(ctr))) return acc, nmi
def testSpiral(loader, averaging=False): target, inputs, nfeat, nsamples = loader.spiral() if type(inputs) is np.ndarray: inputs = torch.from_numpy(inputs).unsqueeze(1).float() ut = np.unique(target) else: ut = target.unique() print(nsamples, ' samples', nfeat, ' dimensions ,targets:', Counter(target)) Nclusters = len(ut) Nsparse = 3 batchsize = 10 model = dmClustering(shapeNet(), Nfactor=0, Nclusters=Nclusters) F, labels = model.unSupervisedLearner(inputs, Nepochs=20, sparsity=Nsparse, bsize=batchsize, lamda=1) if not averaging: #print(F) print('clusters:', ''.join(str(l) for l in labels)) print(Counter(labels)) #print('targets:', ''.join(str(t) for t in target)) nmi = normalized_mutual_info_score(target, labels) print('Normalized mutual information(NMI): {:.2f}%'.format(nmi * 100)) acc = group_label_acc(target, labels) print('Group accuracy: {:.2f}%'.format(acc * 100)) return acc, nmi else: return group_label(target, labels)
def testUsps(loader, pretrained=True): target, data, nfeat, nsamples = loader.usps() inputs = torch.from_numpy(data).unsqueeze(1).float() ut = np.unique(target) print(nsamples, 'samples,', nfeat, 'dimensions,targets:', Counter(target)) Nclusters = len(ut) modelFile = './USPS_metricNet.dat' net = faceNet1d(dim=nfeat) if pretrained: net.load_state_dict(torch.load(modelFile)) model = dmClustering(net, Nclusters=Nclusters) tau = 100000.0 dfx, _ = model.metricLearner(inputs, torch.from_numpy(target), Nepochs=1500, bsize=1000, tau=tau) torch.save(net.state_dict(), modelFile) labels = metricCluster(dfx, ratio=tau / 2) ctr = Counter(labels) print('clusters:', ''.join(str(l) for l in labels)) print(ctr) nmi = normalized_mutual_info_score(target, labels) print('Normalized mutual information(NMI): {:.2f}%'.format(nmi * 100)) acc = group_label_acc(target, labels) print('Group accuracy: {:.2f}%'.format(acc * 100)) print('Number of clusters: {:d} '.format(len(ctr))) return acc, nmi
def avgPred(ntimes, loader, testcase): target, inputs, nfeat, nsamples = loader.spiral() preds = torch.Tensor(ntimes, nsamples) for i in range(ntimes): print('predicting {:d}/{:d} round'.format(i + 1, ntimes)) if type(inputs) is np.ndarray: preds[i, :] = torch.from_numpy(testcase(loader, True)) else: preds[i, :] = testcase(loader, True) print('max Voting') pred, _ = preds.mode(0) nmi = normalized_mutual_info_score(target, pred.numpy()) acc = group_label_acc(target, pred.numpy()) return acc, nmi
def testBlobs(tosave=False): inputs, target = datasets.make_blobs(n_samples=600, cluster_std=[2.0, 1.5, 1.0]) nsamples, nfeat = inputs.shape inputsTensor = torch.from_numpy(inputs).unsqueeze(1).float() ut = np.unique(target) print(nsamples, ' samples', nfeat, ' dimensions ,targets:', Counter(target)) Nclusters = len(ut) Nsparse = 10 Nfactor = 0 batchsize = 25 model = dmClustering(shapeNet(dim=nfeat), Nfactor=Nfactor, Nclusters=Nclusters) F, labels = model.unSupervisedLearner(inputsTensor, Nepochs=6, Ninner=1, sparsity=Nsparse, bsize=batchsize, lamda=1) print('clusters:', ''.join(str(l) for l in labels)) print(Counter(labels)) nmi = normalized_mutual_info_score(target, labels) print('Normalized mutual information(NMI): {:.2f}%'.format(nmi * 100)) acc = group_label_acc(target, labels) print('Group accuracy: {:.2f}%'.format(acc * 100)) model = SpectralClustering(n_clusters=2).fit(inputs) labels_spcl = model.labels_ ss = torch.from_numpy(model.affinity_matrix_).float() N = ss.size(0) degs = ss.sum(1) Ds = torch.eye(N) Ds[Ds == 1] = degs Ls = Ds - ss _, v = Ls.symeig(eigenvectors=True) f_spcl = v[:, :2].numpy() fig, ax = plt.subplots(ncols=5, figsize=(20, 3)) c = ['b', 'r', 'm'] for i, l in enumerate(np.unique(target)): id = (l == target) ax[0].scatter(inputs[id, 0], inputs[id, 1], marker='.', color=c[i]) ax[0].set_title('Three Blobs') for i, l in enumerate(np.unique(labels_spcl)): id = (l == labels_spcl) ax[1].scatter(inputs[id, 0], inputs[id, 1], marker='.', color=c[i]) ax[1].set_title('Spectral Clustering') for i, l in enumerate(np.unique(labels)): id = (l == labels) ax[2].scatter(f_spcl[id, 0], f_spcl[id, 1], marker='.', color=c[i]) ax[2].set_title('Learnt Subspace') for i, l in enumerate(np.unique(labels)): id = (l == labels) ax[3].scatter(inputs[id, 0], inputs[id, 1], marker='.', color=c[i]) ax[3].set_title('Proposed Clustering') for i, l in enumerate(np.unique(labels)): id = (l == labels) ax[4].scatter(F.detach().numpy()[id, 0], F.detach().numpy()[id, 1], marker='.', color=c[i]) ax[4].set_title('Learnt Subspace') plt.tight_layout() if tosave: fig.savefig('ClusteringCompare-ThreeBlobs.png') return 1 else: plt.show(block=False) time.sleep(5) return 1