def FL_sparse(data): #Case1 user only provides data and specifies obj = FacilityLocationFunction(n=43, data=data, mode="sparse", metric="euclidean", num_neigh=10) obj.maximize(10, 'NaiveGreedy', False, False, False)
def FL_clustered_case1(data): #Case1 user only provides data obj = FacilityLocationFunction(n=43, data=data, mode="clustered", metric="euclidean", num_cluster=10) obj.maximize(10, 'NaiveGreedy', False, False, False)
def FL_clustered_case2( data, lab): #Case2 user also provides cluster info along with data obj = FacilityLocationFunction(n=43, data=data, cluster_lab=lab, mode="clustered", metric="euclidean", num_cluster=10) obj.maximize(10, 'NaiveGreedy', False, False, False)
def fl_dense_cpp_kernel(): obj = FacilityLocationFunction(n=num_samples, mode="dense", data=dataArray, metric="euclidean") obj.maximize(budget=budget, optimizer=optimizer, stopIfZeroGain=False, stopIfNegativeGain=False, verbose=False)
def fl_dense_py_kernel(): K_dense = create_kernel(dataArray, mode='dense', metric='euclidean') obj = FacilityLocationFunction(n=num_samples, mode="dense", sijs=K_dense, separate_rep=False) obj.maximize(budget=budget, optimizer=optimizer, stopIfZeroGain=False, stopIfNegativeGain=False, verbose=False)
def fl_dense_py_kernel_np_numba_array64(): K_dense = helper.create_kernel_dense_np_numba(dataArray, 'euclidean') obj = FacilityLocationFunction(n=num_samples, mode="dense", sijs=K_dense, separate_rep=False, pybind_mode="array64") obj.maximize(budget=budget, optimizer=optimizer, stopIfZeroGain=False, stopIfNegativeGain=False, verbose=False)
def test(): num_clusters = 3 #10 #100 #3 cluster_std_dev = 1 #4 #4 #1 num_samples = 9 #500 #5000 #9 budget = 4 #10 #10 #4 points, cluster_ids, centers = make_blobs(n_samples=num_samples, centers=num_clusters, n_features=2, cluster_std=cluster_std_dev, center_box=(0, 100), return_centers=True, random_state=4) data = list(map(tuple, points)) xs = [x[0] for x in data] ys = [x[1] for x in data] #plt.scatter(xs, ys, s=25, color='black', label="Images") #plt.show() dataArray = np.array(data) from submodlib.functions.facilityLocation import FacilityLocationFunction obj = FacilityLocationFunction(n=num_samples, mode="dense", data=dataArray, metric="euclidean") print("Testing FacilityLocation's maximize") # from submodlib.functions.disparitySum import DisparitySumFunction # obj = DisparitySumFunction(n=num_samples, mode="dense", data=dataArray, metric="euclidean") # print("Testing DisparitySum's maximize") #greedyList = obj.maximize(budget=budget, optimizer='NaiveGreedy', stopIfZeroGain=False, stopIfNegativeGain=False, verbose=False) greedyList = obj.maximize(budget=budget, optimizer='LazyGreedy', stopIfZeroGain=False, stopIfNegativeGain=False, verbose=False) #greedyList = obj.maximize(budget=budget, optimizer='StochasticGreedy', stopIfZeroGain=False, stopIfNegativeGain=False, verbose=False) #greedyList = obj.maximize(budget=budget, optimizer='LazierThanLazyGreedy', stopIfZeroGain=False, stopIfNegativeGain=False, verbose=False) print(f"Greedy vector: {greedyList}") greedyXs = [xs[x[0]] for x in greedyList] greedyYs = [ys[x[0]] for x in greedyList]
def fl_dense_py_kernel_other_array(): K_dense = helper.create_kernel(dataArray, mode="dense", metric='euclidean', method="other") obj = FacilityLocationFunction(n=num_samples, mode="dense", sijs=K_dense, separate_rep=False,pybind_mode="array") obj.maximize(budget=budget,optimizer=optimizer, stopIfZeroGain=False, stopIfNegativeGain=False, verbose=False, show_progress=False)
def test(): num_clusters = 3 cluster_std_dev = 1 num_samples = 8 num_set = 3 budget = 4 points, cluster_ids, centers = make_blobs(n_samples=num_samples, centers=num_clusters, n_features=2, cluster_std=cluster_std_dev, center_box=(0,100), return_centers=True, random_state=4) data = list(map(tuple, points)) xs = [x[0] for x in data] ys = [x[1] for x in data] # get num_set data points belonging to cluster#1 random.seed(1) cluster1Indices = [index for index, val in enumerate(cluster_ids) if val == 1] subset1 = random.sample(cluster1Indices, num_set) subset1xs = [xs[x] for x in subset1] subset1ys = [ys[x] for x in subset1] # plt.scatter(xs, ys, s=25, color='black', label="Images") # plt.scatter(subset1xs, subset1ys, s=25, color='red', label="Subset1") # plt.show() set1 = set(subset1[:-1]) # get num_set data points belonging to different clusters subset2 = [] for i in range(num_set): #find the index of first point that belongs to cluster i diverse_index = cluster_ids.tolist().index(i) subset2.append(diverse_index) subset2xs = [xs[x] for x in subset2] subset2ys = [ys[x] for x in subset2] # plt.scatter(xs, ys, s=25, color='black', label="Images") # plt.scatter(subset2xs, subset2ys, s=25, color='red', label="Subset2") # plt.show() set2 = set(subset2[:-1]) dataArray = np.array(data) from submodlib.functions.facilityLocation import FacilityLocationFunction # start = time.process_time() obj5 = FacilityLocationFunction(n=num_samples, data=dataArray, mode="clustered", metric="euclidean", num_clusters=num_clusters) # print(f"Time taken by instantiation = {time.process_time() - start}") print(f"Subset 1's FL value = {obj5.evaluate(set1)}") print(f"Subset 2's FL value = {obj5.evaluate(set2)}") print(f"Gain of adding another point ({subset1[-1]}) of same cluster to {set1} = {obj5.marginalGain(set1, subset1[-1])}") print(f"Gain of adding another point ({subset2[-1]}) of different cluster to {set1} = {obj5.marginalGain(set1, subset2[-1])}") obj5.setMemoization(set1) print(f"Subset 1's Fast FL value = {obj5.evaluateWithMemoization(set1)}") print(f"Fast gain of adding another point ({subset1[-1]}) of same cluster to {set1} = {obj5.marginalGainWithMemoization(set1, subset1[-1])}") # start = time.process_time() greedyList = obj5.maximize(budget=budget,optimizer='NaiveGreedy', stopIfZeroGain=False, stopIfNegativeGain=False, verbose=False) print(f"Greedy vector: {greedyList}") # print(f"Time taken by maximization = {time.process_time() - start}") # greedyXs = [xs[x[0]] for x in greedyList] # greedyYs = [ys[x[0]] for x in greedyList] # plt.scatter(xs, ys, s=25, color='black', label="Images") # plt.scatter(greedyXs, greedyYs, s=25, color='blue', label="Greedy Set") from submodlib import ClusteredFunction obj7 = ClusteredFunction(n=num_samples, mode="multi", f_name='FacilityLocation', metric='euclidean', data=dataArray, num_clusters=num_clusters) # print(f"Time taken by instantiation = {time.process_time() - start}") print(f"Subset 1's FL value = {obj7.evaluate(set1)}") print(f"Subset 2's FL value = {obj7.evaluate(set2)}") print(f"Gain of adding another point ({subset1[-1]}) of same cluster to {set1} = {obj7.marginalGain(set1, subset1[-1])}") print(f"Gain of adding another point ({subset2[-1]}) of different cluster to {set1} = {obj7.marginalGain(set1, subset2[-1])}") obj7.setMemoization(set1) print(f"Subset 1's Fast FL value = {obj7.evaluateWithMemoization(set1)}") print(f"Fast gain of adding another point ({subset1[-1]}) of same cluster to {set1} = {obj7.marginalGainWithMemoization(set1, subset1[-1])}") # start = time.process_time() greedyList = obj7.maximize(budget=budget,optimizer='NaiveGreedy', stopIfZeroGain=False, stopIfNegativeGain=False, verbose=False) print(f"Greedy vector: {greedyList}")
import pytest import numpy as np from scipy import sparse import scipy from submodlib.functions.facilityLocation import FacilityLocationFunction from submodlib.helper import create_kernel from submodlib_cpp import FacilityLocation data=np.array([ [100, 21, 365, 5], [57, 18, -5, -6], [16, 255, 68, -8], [2,20,6, 2000], [12,20,68, 200] ]) s = {1} obj = FacilityLocationFunction(n=5, data=data, mode="sparse", metric="cosine") print(obj.maximize(3,'NaiveGreedy', False, False, False))
def FL_case2(M): #Case2 user directly provides kernel obj = FacilityLocationFunction(n=43, sijs=M, num_neigh=10) obj.maximize(10, 'NaiveGreedy', False, False, False)
def fl_mode_user(): obj = FacilityLocationFunction(n=num_samples, mode="clustered", data=dataArray, metric="euclidean", num_clusters=num_clusters, cluster_labels=cluster_ids.tolist()) obj.maximize(budget=budget,optimizer=optimizer, stopIfZeroGain=False, stopIfNegativeGain=False, verbose=False)
def fl_sparse_py_kernel(): K_sparse = helper.create_kernel(dataArray, mode='sparse', metric='euclidean', num_neigh=num_neighbors) obj = FacilityLocationFunction(n=num_samples, mode="sparse", sijs=K_sparse, num_neighbors=num_neighbors) obj.maximize(budget=budget,optimizer=optimizer, stopIfZeroGain=False, stopIfNegativeGain=False, verbose=False)