def FL_sparse(data): #Case1 user only provides data and specifies obj = FacilityLocationFunction(n=43, data=data, mode="sparse", metric="euclidean", num_neigh=10) obj.maximize(10, 'NaiveGreedy', False, False, False)
def FL_clustered_case1(data): #Case1 user only provides data obj = FacilityLocationFunction(n=43, data=data, mode="clustered", metric="euclidean", num_cluster=10) obj.maximize(10, 'NaiveGreedy', False, False, False)
def test_4_3(self): # X not a subset of ground set for evaluate() M = np.array([[1, 2], [3, 4]]) obj = FacilityLocationFunction(n=2, sijs=M) X = {0, 2} try: obj.evaluate(X) except Exception as e: assert str(e) == "ERROR: X is not a subset of ground set"
def test_4_4(self): # X not a subset of ground set for marginalGain() M = np.array([[1, 2], [3, 4]]) obj = FacilityLocationFunction(n=2, sijs=M) X = {0, 2} try: obj.marginalGain(X, 1) except Exception as e: assert str(e) == "ERROR: X is not a subset of ground set"
def FL_clustered_case2( data, lab): #Case2 user also provides cluster info along with data obj = FacilityLocationFunction(n=43, data=data, cluster_lab=lab, mode="clustered", metric="euclidean", num_cluster=10) obj.maximize(10, 'NaiveGreedy', False, False, False)
def fl_dense_cpp_kernel(): obj = FacilityLocationFunction(n=num_samples, mode="dense", data=dataArray, metric="euclidean") obj.maximize(budget=budget, optimizer=optimizer, stopIfZeroGain=False, stopIfNegativeGain=False, verbose=False)
def fl_dense_py_kernel(): K_dense = create_kernel(dataArray, mode='dense', metric='euclidean') obj = FacilityLocationFunction(n=num_samples, mode="dense", sijs=K_dense, separate_rep=False) obj.maximize(budget=budget, optimizer=optimizer, stopIfZeroGain=False, stopIfNegativeGain=False, verbose=False)
def fl_dense_py_kernel_np_numba_array64(): K_dense = helper.create_kernel_dense_np_numba(dataArray, 'euclidean') obj = FacilityLocationFunction(n=num_samples, mode="dense", sijs=K_dense, separate_rep=False, pybind_mode="array64") obj.maximize(budget=budget, optimizer=optimizer, stopIfZeroGain=False, stopIfNegativeGain=False, verbose=False)
def f_3(): #more realistic test case M = np.array([[-0.78569, 0.75, 0.9, -0.56, 0.005], [0.00006, 0.400906, -0.203, 0.9765, -0.9], [0.1, 0.3, 0.5, 0.0023, 0.9], [-0.1, 0.1, 0.1405, 0.0023, 0.3], [-0.123456, 0.0789, 0.00456, 0.001, -0.9]]) obj = FacilityLocationFunction(n=5, sijs=M) return obj
def test_4_2(self): #Inconsistency between n and no of examples in M M = np.array([[1, 2, 3], [4, 5, 6]]) try: FacilityLocationFunction(n=1, sijs=M) except Exception as e: assert str( e ) == "ERROR: Inconsistentcy between n and no of examples in the given similarity matrix"
def test_4_1(self): #Non-square dense similarity matrix M = np.array([[1, 2, 3], [4, 5, 6]]) try: FacilityLocationFunction(n=2, sijs=M) except Exception as e: assert str( e ) == "ERROR: Dense similarity matrix should be a square matrix if ground and master datasets are same"
def f_5(): data = np.array([[100, 21, 365, 5], [57, 18, -5, -6], [16, 255, 68, -8], [2, 20, 6, 2000], [12, 20, 68, 200]]) obj = FacilityLocationFunction(n=5, data=data, mode="dense", metric="cosine") return obj
def test_4_6(self): # n==0 data = np.array([[1, 2], [3, 4]]) num_neigh, M = create_kernel(data, 'sparse', 'euclidean', num_neigh=1) try: FacilityLocationFunction(n=0, sijs=M, num_neigh=num_neigh) except Exception as e: assert str( e) == "ERROR: Number of elements in ground set can't be 0"
def test(): num_clusters = 3 #10 #100 #3 cluster_std_dev = 1 #4 #4 #1 num_samples = 9 #500 #5000 #9 budget = 4 #10 #10 #4 points, cluster_ids, centers = make_blobs(n_samples=num_samples, centers=num_clusters, n_features=2, cluster_std=cluster_std_dev, center_box=(0, 100), return_centers=True, random_state=4) data = list(map(tuple, points)) xs = [x[0] for x in data] ys = [x[1] for x in data] #plt.scatter(xs, ys, s=25, color='black', label="Images") #plt.show() dataArray = np.array(data) from submodlib.functions.facilityLocation import FacilityLocationFunction obj = FacilityLocationFunction(n=num_samples, mode="dense", data=dataArray, metric="euclidean") print("Testing FacilityLocation's maximize") # from submodlib.functions.disparitySum import DisparitySumFunction # obj = DisparitySumFunction(n=num_samples, mode="dense", data=dataArray, metric="euclidean") # print("Testing DisparitySum's maximize") #greedyList = obj.maximize(budget=budget, optimizer='NaiveGreedy', stopIfZeroGain=False, stopIfNegativeGain=False, verbose=False) greedyList = obj.maximize(budget=budget, optimizer='LazyGreedy', stopIfZeroGain=False, stopIfNegativeGain=False, verbose=False) #greedyList = obj.maximize(budget=budget, optimizer='StochasticGreedy', stopIfZeroGain=False, stopIfNegativeGain=False, verbose=False) #greedyList = obj.maximize(budget=budget, optimizer='LazierThanLazyGreedy', stopIfZeroGain=False, stopIfNegativeGain=False, verbose=False) print(f"Greedy vector: {greedyList}") greedyXs = [xs[x[0]] for x in greedyList] greedyYs = [ys[x[0]] for x in greedyList]
def f_7(): data = np.array([[100, 21, 365, 5], [57, 18, -5, -6], [16, 255, 68, -8], [2, 20, 6, 2000], [12, 20, 68, 200]]) num_cluster = 2 obj = FacilityLocationFunction(n=5, data=data, mode="clustered", metric="euclidean", num_cluster=num_cluster) return obj
def test_4_5( self ): # If sparse matrix is provided but without providing number of neighbors that were used to create it data = np.array([[1, 2], [3, 4]]) num_neigh, M = create_kernel(data, 'sparse', 'euclidean', num_neigh=1) try: FacilityLocationFunction( n=2, sijs=M ) #its important for user to pass num_neigh with sparse matrix because otherwise #there is no way for Python FL and C++ FL to know how many nearest neighours were #reatined in sparse matrix except Exception as e: assert str( e) == "ERROR: num_neigh for given sparse matrix not provided"
def create_fl_dense_py_kernel(num_samples, pyDenseKernel): return FacilityLocationFunction(n=num_samples, mode="dense", sijs=pyDenseKernel, separate_rep=False)
def fl_dense_py_kernel_other_array(): K_dense = helper.create_kernel(dataArray, mode="dense", metric='euclidean', method="other") obj = FacilityLocationFunction(n=num_samples, mode="dense", sijs=K_dense, separate_rep=False,pybind_mode="array") obj.maximize(budget=budget,optimizer=optimizer, stopIfZeroGain=False, stopIfNegativeGain=False, verbose=False, show_progress=False)
def create_fl_dense_cpp_kernel(num_samples, dataArray): return FacilityLocationFunction(n=num_samples, mode="dense", data=dataArray, metric="euclidean")
def test(): num_clusters = 3 cluster_std_dev = 1 num_samples = 8 num_set = 3 budget = 4 points, cluster_ids, centers = make_blobs(n_samples=num_samples, centers=num_clusters, n_features=2, cluster_std=cluster_std_dev, center_box=(0,100), return_centers=True, random_state=4) data = list(map(tuple, points)) xs = [x[0] for x in data] ys = [x[1] for x in data] # get num_set data points belonging to cluster#1 random.seed(1) cluster1Indices = [index for index, val in enumerate(cluster_ids) if val == 1] subset1 = random.sample(cluster1Indices, num_set) subset1xs = [xs[x] for x in subset1] subset1ys = [ys[x] for x in subset1] # plt.scatter(xs, ys, s=25, color='black', label="Images") # plt.scatter(subset1xs, subset1ys, s=25, color='red', label="Subset1") # plt.show() set1 = set(subset1[:-1]) # get num_set data points belonging to different clusters subset2 = [] for i in range(num_set): #find the index of first point that belongs to cluster i diverse_index = cluster_ids.tolist().index(i) subset2.append(diverse_index) subset2xs = [xs[x] for x in subset2] subset2ys = [ys[x] for x in subset2] # plt.scatter(xs, ys, s=25, color='black', label="Images") # plt.scatter(subset2xs, subset2ys, s=25, color='red', label="Subset2") # plt.show() set2 = set(subset2[:-1]) dataArray = np.array(data) from submodlib.functions.facilityLocation import FacilityLocationFunction # start = time.process_time() obj5 = FacilityLocationFunction(n=num_samples, data=dataArray, mode="clustered", metric="euclidean", num_clusters=num_clusters) # print(f"Time taken by instantiation = {time.process_time() - start}") print(f"Subset 1's FL value = {obj5.evaluate(set1)}") print(f"Subset 2's FL value = {obj5.evaluate(set2)}") print(f"Gain of adding another point ({subset1[-1]}) of same cluster to {set1} = {obj5.marginalGain(set1, subset1[-1])}") print(f"Gain of adding another point ({subset2[-1]}) of different cluster to {set1} = {obj5.marginalGain(set1, subset2[-1])}") obj5.setMemoization(set1) print(f"Subset 1's Fast FL value = {obj5.evaluateWithMemoization(set1)}") print(f"Fast gain of adding another point ({subset1[-1]}) of same cluster to {set1} = {obj5.marginalGainWithMemoization(set1, subset1[-1])}") # start = time.process_time() greedyList = obj5.maximize(budget=budget,optimizer='NaiveGreedy', stopIfZeroGain=False, stopIfNegativeGain=False, verbose=False) print(f"Greedy vector: {greedyList}") # print(f"Time taken by maximization = {time.process_time() - start}") # greedyXs = [xs[x[0]] for x in greedyList] # greedyYs = [ys[x[0]] for x in greedyList] # plt.scatter(xs, ys, s=25, color='black', label="Images") # plt.scatter(greedyXs, greedyYs, s=25, color='blue', label="Greedy Set") from submodlib import ClusteredFunction obj7 = ClusteredFunction(n=num_samples, mode="multi", f_name='FacilityLocation', metric='euclidean', data=dataArray, num_clusters=num_clusters) # print(f"Time taken by instantiation = {time.process_time() - start}") print(f"Subset 1's FL value = {obj7.evaluate(set1)}") print(f"Subset 2's FL value = {obj7.evaluate(set2)}") print(f"Gain of adding another point ({subset1[-1]}) of same cluster to {set1} = {obj7.marginalGain(set1, subset1[-1])}") print(f"Gain of adding another point ({subset2[-1]}) of different cluster to {set1} = {obj7.marginalGain(set1, subset2[-1])}") obj7.setMemoization(set1) print(f"Subset 1's Fast FL value = {obj7.evaluateWithMemoization(set1)}") print(f"Fast gain of adding another point ({subset1[-1]}) of same cluster to {set1} = {obj7.marginalGainWithMemoization(set1, subset1[-1])}") # start = time.process_time() greedyList = obj7.maximize(budget=budget,optimizer='NaiveGreedy', stopIfZeroGain=False, stopIfNegativeGain=False, verbose=False) print(f"Greedy vector: {greedyList}")
import pytest import numpy as np from scipy import sparse import scipy from submodlib.functions.facilityLocation import FacilityLocationFunction from submodlib.helper import create_kernel from submodlib_cpp import FacilityLocation data=np.array([ [100, 21, 365, 5], [57, 18, -5, -6], [16, 255, 68, -8], [2,20,6, 2000], [12,20,68, 200] ]) s = {1} obj = FacilityLocationFunction(n=5, data=data, mode="sparse", metric="cosine") print(obj.maximize(3,'NaiveGreedy', False, False, False))
def create_fl_mode_user(): return FacilityLocationFunction(n=num_samples, mode="clustered", data=dataArray, metric="euclidean", num_clusters=num_clusters, cluster_labels=cluster_ids.tolist())
def create_fl_mode_birch(): return FacilityLocationFunction(n=num_samples, mode="clustered", data=dataArray, metric="euclidean", num_clusters=num_clusters)
#A dryrun of implemented code with dummy data import numpy as np from submodlib.functions.facilityLocation import FacilityLocationFunction from submodlib.helper import create_kernel data = np.array([[1, 2, 3], [3, 4, 5], [4, 5, 6]]) #dryrun of create_kernel n_, K_dense = create_kernel(data, 'dense', 'euclidean') print(K_dense) n_, K_sparse = create_kernel(data, 'sparse', 'euclidean', num_neigh=2) print(K_sparse) #dryrun of C++ FL and Python FL when user provides similarity matrix #1) with dense matrix obj = FacilityLocationFunction(n=3, sijs=K_dense) X = {1} print(obj.evaluate(X)) X = {1, 2} print(obj.evaluate(X)) X = {1} print(obj.marginalGain(X, 2)) #2) with sparse matrix obj = FacilityLocationFunction(n=3, sijs=K_sparse, num_neigh=2) #dryrun of C++ FL and Python FL when user provides data #1) with dense mode obj = FacilityLocationFunction(n=3, data=data, mode="dense",
def f_1(): # A simple easy to calculate test case M = np.array([[1, 3, 2], [5, 4, 3], [4, 7, 5]]) obj = FacilityLocationFunction(n=3, sijs=M) return obj
def create_fl_sparse_cpp_kernel(num_samples, dataArray, num_neighbors): return FacilityLocationFunction(n=num_samples, mode="sparse", data=dataArray, metric="euclidean", num_neighbors=num_neighbors)
def f_2(): #Boundary case of just one element M = np.array([-0.78569]) obj = FacilityLocationFunction(n=1, sijs=M) return obj
def FL_case2(M): #Case2 user directly provides kernel obj = FacilityLocationFunction(n=43, sijs=M, num_neigh=10) obj.maximize(10, 'NaiveGreedy', False, False, False)
def create_fl_sparse_py_kernel(num_samples, pySparseKernel, num_neighbors): return FacilityLocationFunction(n=num_samples, mode="sparse", sijs=pySparseKernel, num_neighbors=num_neighbors)
def fl_mode_user(): obj = FacilityLocationFunction(n=num_samples, mode="clustered", data=dataArray, metric="euclidean", num_clusters=num_clusters, cluster_labels=cluster_ids.tolist()) obj.maximize(budget=budget,optimizer=optimizer, stopIfZeroGain=False, stopIfNegativeGain=False, verbose=False)