Beispiel #1
0
def run():
	n = 4000
	k = 3
	N = k*n
	p = 0.01
	q = 0.01
	P = np.ones(k) / k
	F = DSBM.random_complete(k, η=0.0, random_state=28)
#	C = DSBM_PA.tree(k = k, η = 0.0, inner_edges=0.15)
	print(F.shape)
	PA_kwargs = convert.DSBM_to_PA(n=n, k=k, p=p, q=q, F=F)
#	PA_kwargs['C'] = C
	
	aris_DSBM_disim = []
	aris_DSBM_herm = []
	
	for seed in seeds:
		GDSBM, comms_DSBM = DSBM_PA.sample(random_state=seed, Herm=False, a=10, **PA_kwargs)
		print([len(l) for l in comms_DSBM])
		clusters = disim.cluster(GDSBM, k, k, mode='R')
		print([len(l) for l in clusters])
		ari = evaluate.ari(comms_DSBM, clusters)
		aris_DSBM_disim.append(ari)
		print('Disim', ari)
		GDSBM, comms_DSBM = DSBM_PA.sample(random_state=seed, Herm=True, a=10, **PA_kwargs)
		clusters = herm.cluster(GDSBM, k, 0.1)
		ari = evaluate.ari(comms_DSBM, clusters)
		print('Herm', ari)
		aris_DSBM_herm.append(ari)

	print('Mean Disim', np.mean(aris_DSBM_disim))
	print('Mean Herm', np.mean(aris_DSBM_herm))
Beispiel #2
0
def spectral_gap():
    # circle metagraph
    noises = np.linspace(0, 0.5, 15)
    ks = np.array([3, 5, 7])
    ps = np.array([0.003, 0.0035, 0.004, 0.0045, 0.005, 0.0055])
    seeds = np.array([6, 28, 496, 8128, 33550336])
    ns = [3000, 5000]  # np.array([1000, 1500, 2000, 2500, 5000])
    for (η, k, n, p) in itertools.product(noises, ks, ns, ps):
        q = p
        F = DSBM.circle(k, η)
        print(F.shape)
        PA_kwargs = convert.DSBM_to_PA(n=n, k=k, p=p, q=q, F=F)
        for seed in seeds:
            #		GDSBM = DSBM.sample(n=n, k=k, p=p, q=q, F=F, random_state=seed, Herm=True)
            GPA = DSBM_PA.sample(**PA_kwargs)
            print(GPA.shape)
Beispiel #3
0
def run_test(seed, N, k, p, q, η):
    n = N // k
    F = DSBM.circle(k, η)
    #	C = DSBM_PA.tree(k = k, η = 0.0, inner_edges=0.15)
    #	print(F.shape)
    PA_kwargs = convert.DSBM_to_PA(n=n, k=k, p=p, q=q, F=F)
    #	PA_kwargs['C'] = C

    aris_DSBM_disim = []
    aris_DSBM_herm = []

    GDSBM, comms_DSBM = DSBM.sample(n=n,
                                    k=k,
                                    p=p,
                                    q=q,
                                    F=F,
                                    random_state=seed,
                                    Herm=False)
    GPA, comms_pa = DSBM_PA.sample(a=10, **PA_kwargs, random_state=seed)
    print([len(l) for l in comms_DSBM])
    clustersds_dsbm = disim.cluster(GDSBM, k, k, mode='R')
    clustershm_dsbm = herm.cluster(hermify.to_herm(GDSBM), k, k)
    clustersds_pa = disim.cluster(GPA, k, k, mode='R')
    clustershm_pa = herm.cluster(hermify.to_herm(GPA), k, k)
    ari_dsbm_disim = evaluate.ari(comms_DSBM, clustersds_dsbm)
    ari_dsbm_herm = evaluate.ari(comms_DSBM, clustershm_dsbm)
    ari_pa_disim = evaluate.ari(comms_pa, clustersds_pa)
    ari_pa_herm = evaluate.ari(comms_pa, clustershm_pa)
    miscl_vs_disim_dsbm = evaluate.misclustered_vertices(
        comms_DSBM, clustersds_dsbm)
    miscl_vs_herm_dsbm = evaluate.misclustered_vertices(
        comms_DSBM, clustershm_dsbm)
    miscl_vs_disim_pa = evaluate.misclustered_vertices(comms_pa, clustersds_pa)
    miscl_vs_herm_pa = evaluate.misclustered_vertices(comms_pa, clustershm_pa)

    print()

    results = dict([])
    results['DiSim_DSBM_M'] = miscl_vs_disim_dsbm
    results['DiSim_PA_M'] = miscl_vs_disim_pa
    results['Herm_DSBM_M'] = miscl_vs_herm_dsbm
    results['Herm_PA_M'] = miscl_vs_herm_pa
    results['DiSim_DSBM_A'] = ari_dsbm_disim
    results['DiSim_PA_A'] = ari_pa_disim
    results['Herm_DSBM_A'] = ari_dsbm_herm
    results['Herm_PA_A'] = ari_pa_herm
    return results
Beispiel #4
0
def spectral_gap():
    # complete metagraph
    noises = np.array([0.0, 0.1, 0.2])
    ks = np.array([3, 5, 7])
    ps = np.array([0.0035, 0.0045, 0.005])
    seeds = np.array([6, 28, 496, 8128])
    ns = np.array([1500, 2500, 5000])
    for (η, k, n, p) in itertools.product(noises, ks, ns, ps):
        q = p
        F = DSBM.random_complete(random_state=np.random.choice(seeds),
                                 k=k,
                                 η=η)
        print(F.shape)
        PA_kwargs = convert.DSBM_to_PA(n=n, k=k, p=p, q=q, F=F)
        spectra = []
        for seed in seeds:
            #	GPA, comms = DSBM.sample(n=n, k=k, p=p, q=q, F=F, random_state=seed, Herm=False)
            GPA, comms = DSBM_PA.sample(a=10, **PA_kwargs, Herm=False)
            print(GPA.shape)
            out_degrees = GPA.sum(axis=1)
            in_degrees = GPA.sum(axis=0)
            τ = out_degrees.mean()
            Pvals = in_degrees + τ
            _, m = GPA.shape
            Pτ = scipy.sparse.spdiags(Pvals, 0, m, m)
            print('computed P')
            Ovals = out_degrees + τ
            Oτ = scipy.sparse.spdiags(np.reshape(Ovals, (1, -1)), 0, m, m)
            print('computed O')
            tmp1 = Oτ.power(-0.5)
            print('generated tmp1')
            tmp2 = Pτ.power(-0.5)
            print('generated tmp2')
            print(tmp1.shape, GPA.shape, tmp2.shape)
            L = tmp1 @ GPA @ tmp2
            from scipy.sparse import linalg as la
            U, Σ, V = la.svds(L, k=10)
            Σ.sort()
            spectra.append(Σ[::-1])
        import matplotlib.pyplot as plt
        fig, ax = plt.subplots()
        for Σ in spectra:
            ax.scatter(np.arange(1, 11), Σ, s=1.5)
        plt.savefig(
            f'complete_meta_spectral_gap_k{k}_N{k*n}_noise{η}_p{p}.png')
def run_test(seed, N, k, p, q, η, interpolation):
    n = N // k
    C1 = np.zeros((k, k))
    itr = 0
    while any(C1.sum(axis=1) == 0):
        F = DSBM.random_complete(k, η, random_state=seed**itr)
        PA_kwargs = convert.DSBM_to_PA(n=n, k=k, p=p, q=q, F=F)
        C2 = PA_kwargs['C']
        C1[:, :] = C2
        for pos in range(k):
            C1[pos, pos] = 0

    C1 = C1 / C1.sum(axis=1).reshape(-1, 1)
    print(C2)
    print(C1)

    PA_kwargs['C'] = interpolation * C1 + (1 - interpolation) * C2

    aris_DSBM_disim = []
    aris_DSBM_herm = []

    GPA, comms = DSBM_PA.sample(a=10,
                                **PA_kwargs,
                                Herm=False,
                                random_state=seed)
    GPA_Herm = hermify.to_herm(GPA)

    clusters_disim = disim.cluster(GPA, k, k, mode='R')
    clusters_herm = herm.cluster(GPA_Herm, k, ϵ=-1, RW=True)

    ari_disim = evaluate.ari(comms, clusters_disim)
    ari_herm = evaluate.ari(comms, clusters_herm)
    #	mv_disim = evaluate.misclustered_vertices(comms, clusters_disim)
    #	mv_herm = evaluate.misclustered_vertices(comms, clusters_herm)

    results = {
        'interpolation': interpolation,
        'ari_disim': ari_disim,
        'ari_herm': ari_herm,
    }
    return results
def run_experiments(seed=0, noise=0, tag="", norm=None):
    η = noise
    print("Running ROCMG-experiments for seed", seed)
    k = 5
    n = 4000
    its = 0
    p = 0
    q = 0.0045
    dic = 'total=0'
    while dic == 'total=0':
        F = DSBM.random_complete(k=k, η=0, random_state=seed + its)
        dic = convert.DSBM_to_PA(k=k, n=n, p=p, q=q, F=F)
        its += 1
    dic['C'] = η * np.diag(np.ones(k)) + (1 - η) * dic['C']
    dic2 = convert.PA_to_DSBM(**dic)
    if norm == None:
        A, comms = DSBM.sample(n=n,
                               k=k,
                               p=dic2['p'],
                               q=dic2['q'],
                               F=F,
                               random_state=seed,
                               Herm=False)
        print("finished sampling dsbm. moving to clustering.")
        cls = disim.cluster(A=A, kz=k, ky=k, norm=norm)
        ari_ds = evaluate.ari(comms, cls)
        result1 = {
            'model': ['DSBM'],
            'average_edge_probability': [q],
            'percentage_intra_edges': [η],
            'algorithm': ['DiSim'],
            'ari': [ari_ds]
        }
        df = pd.DataFrame(data=result1)
        df.to_csv(f'results/{tag}.csv', mode='a', header=False)
        print('finished disim-dsbm, moving on to herm-dsbm')
        A = hermify.to_herm(A)
        cls = herm.cluster(A=A, k=k, norm=norm)
        ari_herm = evaluate.ari(comms, cls)
        result2 = {
            'model': ['DSBM'],
            'average_edge_probability': [q],
            'percentage_intra_edges': [η],
            'algorithm': ['Herm'],
            'ari': [ari_herm]
        }
        df = pd.DataFrame(data=result2)
        df.to_csv(f"results/{tag}.csv", mode='a', header=False)
        print('finished herm-dsbm, movnig on to disim-pa')
        A, comms = DSBM_PA.sample(random_state=seed,
                                  a=dic['c'],
                                  Herm=False,
                                  **dic)
        cls = disim.cluster(A=A, kz=k, ky=k, norm=norm)
        ari = evaluate.ari(comms, cls)
        result3 = {
            'model': ['DSBM_PA'],
            'average_edge_probability': [q],
            'percentage_intra_edges': [η],
            'algorithm': ['DiSim'],
            'ari': [ari]
        }
        print("finished disim-pa, moving on to herm-pa")
        df = pd.DataFrame(data=result3)
        df.to_csv(f"results/{tag}.csv", mode='a', header=False)
        A = hermify.to_herm(A)
        cls = herm.cluster(A=A, k=k, norm=norm)
        ari = evaluate.ari(comms, cls)
        result4 = {
            'model': ['DSBM_PA'],
            'average_edge_probability': [q],
            'percentage_intra_edges': [η],
            'algorithm': ['Herm'],
            'ari': [ari]
        }
        df = pd.DataFrame(data=result4)
        df.to_csv(f"results/{tag}.csv", mode='a', header=False)
        print(f"Concluded experiments for p={p}")
    elif norm == 'interpolation':
        A, comms = DSBM.sample(n=n,
                               k=k,
                               p=dic2['p'],
                               q=dic2['q'],
                               F=F,
                               random_state=seed,
                               Herm=False)
        print("finished sampling dsbm. moving to clustering.")
        A = hermify.to_herm(A)
        for r in np.arange(1, 11):
            cls = herm.cluster(A=A, k=k, norm=norm, r=r)
            ari_herm = evaluate.ari(comms, cls)
            result2 = {
                'model': ['DSBM'],
                'average_edge_probability': [q],
                'percentage_intra_edges': [η],
                'r': [r],
                'ari': [ari_herm]
            }
            df = pd.DataFrame(data=result2)
            df.to_csv(f"results/{tag}.csv", mode='a', header=False)
        print('finished herm-dsbm, movnig on to disim-pa')
        A, comms = DSBM_PA.sample(random_state=seed,
                                  a=dic['c'],
                                  Herm=False,
                                  **dic)
        A = hermify.to_herm(A)
        for r in np.arange(1, 11):
            cls = herm.cluster(A=A, k=k, norm=norm)
            ari = evaluate.ari(comms, cls)
            result4 = {
                'model': ['DSBM_PA'],
                'average_edge_probability': [q],
                'percentage_intra_edges': [η],
                'r': [r],
                'ari': [ari]
            }
            df = pd.DataFrame(data=result4)
            df.to_csv(f"results/{tag}.csv", mode='a', header=False)
        print(f"Concluded experiments for p={p}")
Beispiel #7
0
def run_experiments(seed=0, noise=0, tag="", norm=None):
	print("Running ROCMG-experiments for seed", seed)
	k = 5
	n = 2000 if norm == 'densify' else 4000
	F = DSBM.random_complete(k=k, η=noise, random_state=seed)
	if norm == None:
		for p in [0.002, 0.004, 0.006, 0.008]:
			q = p
			PA_kwargs = convert.DSBM_to_PA(k = k, n=n, p=p, q=q, F=F)
			A, comms = DSBM.sample(n=n, k=k, p=p, q=q, F=F, random_state=seed, Herm=False)
			print("finished sampling dsbm. moving to clustering.")
			cls = disim.cluster(A=A, kz=k, ky=k, norm=norm)
			ari_ds = evaluate.ari(comms, cls)
			result1 = {
				'model': ['DSBM'],
				'p': [p],
				'noise': [noise],
				'algorithm': ['DiSim'],
				'ari': [ari_ds]
			}
			df = pd.DataFrame(data=result1)
			df.to_csv(f'results/{tag}.csv', mode='a', header=False)
			print('finished disim-dsbm, moving on to herm-dsbm')
			A = hermify.to_herm(A)
			cls = herm.cluster(A=A, k=k, norm=norm)
			ari_herm = evaluate.ari(comms, cls)
			result2 = {
				'model': ['DSBM'],
				'p': [p],
				'noise': [noise],
				'algorithm': ['Herm'],
				'ari': [ari_herm]
			}
			df = pd.DataFrame(data=result2)
			df.to_csv(f"results/{tag}.csv", mode='a', header=False)
			print('finished herm-dsbm, movnig on to disim-pa')
			A, comms = DSBM_PA.sample(random_state=seed, a=PA_kwargs['c'], Herm=False, **PA_kwargs)
			cls = disim.cluster(A=A, kz=k, ky=k, norm=norm)
			ari = evaluate.ari(comms, cls)
			result3 = {
				'model': ['DSBM_PA'],
				'p': [p],
				'noise': [noise],
				'algorithm': ['DiSim'],
				'ari': [ari]
			}
			print("finished disim-pa, moving on to herm-pa")
			df = pd.DataFrame(data=result3)
			df.to_csv(f"results/{tag}.csv", mode='a', header=False)
			A = hermify.to_herm(A)
			cls = herm.cluster(A=A, k=k, norm=norm)
			ari = evaluate.ari(comms, cls)
			result4 = {
				'model': ['DSBM_PA'],
				'p': [p],
				'noise': [noise],
				'algorithm': ['Herm'],
				'ari': [ari]
			}
			df = pd.DataFrame(data=result4)
			df.to_csv(f"results/{tag}.csv", mode='a', header=False)			
			print(f"Concluded experiments for p={p}")
	elif norm == 'self-loops':
		for p in [0.003, 0.006]:
			q = p
			PA_kwargs = convert.DSBM_to_PA(k=k, n=n, p=p, q=q, F=F)
			A, comms = DSBM_PA.sample(a = PA_kwargs['c'], random_state=seed, Herm=False, **PA_kwargs)
			AH = hermify.to_herm(A)
			for τ in [0.1, 0.4, 0.75, 0.85, 1, 1.15, 1.25, 1.6, 2, 3]:
				print('clustering using disim')
				cls = disim.cluster(A = A, ky = k, kz = k, norm = norm, τ_self_loops = τ)
				ari = evaluate.ari(comms, cls)
				result1 = {
					'p' : [p],
					'noise': [noise],
					'tau': [τ],
					'algorithm': ['DiSim'],
					'ari': [ari]
				}
				df = pd.DataFrame(data=result1)
				df.to_csv(f"results/{tag}.csv", mode='a', header=False)
				print('finished with DiSim, moving to Herm')
				cls = herm.cluster(A = AH, k = k, norm = norm, τ_self_loops = τ)
				ari = evaluate.ari(comms, cls)
				result1['algorithm'] = ['Herm']
				result1['ari'] = [ari]
				df = pd.DataFrame(data=result1)
				df.to_csv(f"results/{tag}.csv", mode='a', header=False)
				print('done with herm')
	elif norm == 'densify':
		for p in [0.003, 0.006]:
			q = p
			PA_kwargs = convert.DSBM_to_PA(k=k, n=n, p=p, q=q, F=F)
			A, comms = DSBM_PA.sample(a = PA_kwargs['c'], random_state=seed, Herm=False, **PA_kwargs)
			AH = hermify.to_herm(A)
			for ω in np.linspace(0,0.002,9):
				print('clustering using disim')
				cls = disim.cluster(A = A, ky = k, kz = k, norm = norm, ω=ω)
				ari = evaluate.ari(comms, cls)
				result1 = {
					'p' : [p],
					'noise': [noise],
					'omega': [ω],
					'algorithm': ['DiSim'],
					'ari': [ari]
				}
				df = pd.DataFrame(data=result1)
				df.to_csv(f"results/{tag}.csv", mode='a', header=False)
				print('finished with DiSim, moving to Herm')
				cls = herm.cluster(A = AH, k = k, norm = norm, ω = ω)
				ari = evaluate.ari(comms, cls)
				result1['algorithm'] = ['Herm']
				result1['ari'] = [ari]
				df = pd.DataFrame(data=result1)
				df.to_csv(f"results/{tag}.csv", mode='a', header=False)
				print('done with herm')


	pass
Beispiel #8
0
def eigenvectors():
    # complete metagraph
    noises = np.array([0.0, 0.1, 0.2])
    ks = np.array([3, 5, 7])
    ps = np.array([0.0045])
    np.array([0.0035, 0.0045, 0.005])
    seeds = np.array([6])
    np.array([6, 28, 496, 8128])
    ns = np.array([2500])
    np.array([1500, 2500, 5000])
    for (η, k, n, p) in itertools.product(noises, ks, ns, ps):
        q = p
        F = DSBM.random_complete(random_state=np.random.choice(seeds),
                                 k=k,
                                 η=η)
        print(F.shape)
        PA_kwargs = convert.DSBM_to_PA(n=n, k=k, p=p, q=q, F=F)
        spectra = []
        for seed in seeds:
            #	GPA, comms = DSBM.sample(n=n, k=k, p=p, q=q, F=F, random_state=seed, Herm=False)
            GPA, comms = DSBM_PA.sample(a=PA_kwargs['c'],
                                        **PA_kwargs,
                                        Herm=False)
            print(GPA.shape)
            out_degrees = GPA.sum(axis=1)
            in_degrees = GPA.sum(axis=0)
            τ = out_degrees.mean()
            Pvals = in_degrees + τ
            _, m = GPA.shape
            Pτ = scipy.sparse.spdiags(Pvals, 0, m, m)
            print('computed P')
            Ovals = out_degrees + τ
            Oτ = scipy.sparse.spdiags(np.reshape(Ovals, (1, -1)), 0, m, m)
            print('computed O')
            tmp1 = Oτ.power(-0.5)
            print('generated tmp1')
            tmp2 = Pτ.power(-0.5)
            print('generated tmp2')
            print(tmp1.shape, GPA.shape, tmp2.shape)
            L = tmp1 @ GPA @ tmp2
            from scipy.sparse import linalg as la
            U, Σ, V = la.svds(L, k=10)
            inds = Σ.argsort()
            spectrum = {
                'L': U[:, inds[-3:]],
                'R': V[:, inds[-3:]],
                'comms': comms
            }
            print(spectrum['L'].shape)
            spectra.append(spectrum)

        import matplotlib.pyplot as plt
        figP, axesP = plt.subplots(1, 3, figsize=(12, 3), sharey='row')
        figP.suptitle('top left singular vectors')
        spectrum = spectra[np.random.choice(np.arange(len(spectra)))]
        for j in range(3, 0, -1):
            num = 0
            for comm in spectrum['comms']:
                print(spectrum['L'][:, -j].shape)
                gL = np.array(spectrum['L'][:, -j]).reshape(-1)
                print(gL[comm].shape, gL[comm])
                axesP[j - 1].bar(x=num + np.arange(len(comm)), height=gL[comm])
                num += len(comm)

        figP.savefig(f'DSBM_PA_thicker_tail_k{k}_N{k*n}_noise{η}.pdf')
Beispiel #9
0
def run_experiments(seed=0, noise=0, tag="", norm=None):
	print("Running NCyMG-experiments for seed", seed)
	k = 5
	n = 4000
	F = DSBM.cycle(k=k, η=noise)
	if norm == None:
		for p in [0.002, 0.004, 0.006, 0.008]:
			q = p
			PA_kwargs = convert.DSBM_to_PA(k = k, n=n, p=p, q=q, F=F)
			A, comms = DSBM.sample(n=n, k=k, p=p, q=q, F=F, random_state=seed, Herm=False)
			print("finished sampling dsbm. moving to clustering.")
			cls = disim.cluster(A=A, kz=k, ky=k, norm=norm)
			ari_ds = evaluate.ari(comms, cls)
			result1 = {
				'model': ['DSBM'],
				'p': [p],
				'noise': [noise],
				'algorithm': ['DiSim'],
				'ari': [ari_ds]
			}
			df = pd.DataFrame(data=result1)
			df.to_csv(f'results/{tag}.csv', mode='a', header=False)
			print('finished disim-dsbm, moving on to herm-dsbm')
			A = hermify.to_herm(A)
			cls = herm.cluster(A=A, k=k, norm=norm)
			ari_herm = evaluate.ari(comms, cls)
			result2 = {
				'model': ['DSBM'],
				'p': [p],
				'noise': [noise],
				'algorithm': ['Herm'],
				'ari': [ari_herm]
			}
			df = pd.DataFrame(data=result2)
			df.to_csv(f"results/{tag}.csv", mode='a', header=False)
			print('finished herm-dsbm, movnig on to disim-pa')
			A, comms = DSBM_PA.sample(random_state=seed, a=PA_kwargs['c'], Herm=False, **PA_kwargs)
			cls = disim.cluster(A=A, kz=k, ky=k, norm=norm)
			ari = evaluate.ari(comms, cls)
			result3 = {
				'model': ['DSBM_PA'],
				'p': [p],
				'noise': [noise],
				'algorithm': ['DiSim'],
				'ari': [ari]
			}
			print("finished disim-pa, moving on to herm-pa")
			df = pd.DataFrame(data=result3)
			df.to_csv(f"results/{tag}.csv", mode='a', header=False)
			A = hermify.to_herm(A)
			cls = herm.cluster(A=A, k=k, norm=norm)
			ari = evaluate.ari(comms, cls)
			result4 = {
				'model': ['DSBM_PA'],
				'p': [p],
				'noise': [noise],
				'algorithm': ['Herm'],
				'ari': [ari]
			}
			df = pd.DataFrame(data=result4)
			df.to_csv(f"results/{tag}.csv", mode='a', header=False)			
			print(f"Concluded experiments for p={p}")
	pass