class Experiment03(object): """ Experiment03 class """ def __init__(self, config): """ Constructor :param config: :return: """ self.config = config self.logger = logging.getLogger("so_logger") self.data_provider = DataProvider(self.config) self.data_exporter = DataExporter(self.config) @staticmethod def run_algorithm(args): # Run algorithm alg = AlgorithmDriver() data = alg.run(*args) return data def set_scaling_factor(self, data): # Find appropriate scaling factor alg = SetCoverGreedy(self.config, data.submodular_func, data.E) sol = alg.run() submodular_val = data.submodular_func(sol) cost = data.cost_func(sol) scaling_factor = data.scaling_func(submodular_val, cost) # Update scaling factor data.scaling_factor = scaling_factor def run(self): """ Run experiment :param: :return: """ self.logger.info("Starting experiment 03") self.expt_config = self.config['experiment_configs']['experiment_03'] popular_threshold = self.expt_config['popular_threshold'] rare_threshold = self.expt_config['rare_threshold'] user_sample_ratios = [0.05, 0.1] seeds = [i for i in range(6, 10)] sampling_epsilon_values_stochastic = [0.1, 0.05, 0.01, 0.005] error_epsilon_values_scaled_threshold = [0.2, 0.15, 0.1, 0.05] num_sampled_skills = 50 rare_sample_fraction = 0.1 popular_sample_fraction = 0.1 scaling_factor = 800 alg = AlgorithmDriver() results = [] for seed in seeds: for user_sample_ratio in user_sample_ratios: # Load dataset data = self.data_provider.read_freelancer_data_obj() config = self.config.copy() alg.create_sample(config, data, num_sampled_skills, rare_sample_fraction, popular_sample_fraction, rare_threshold, popular_threshold, user_sample_ratio, seed) self.logger.info( "Experiment for user sample ratio: {} and scaling factor: {} and seed: {} and number of elements: {}" .format(user_sample_ratio, scaling_factor, seed, len(data.E))) self.logger.info( "Scaling factor for submodular function is: {}".format( scaling_factor)) # Total number of elements n = len(data.E) # Distorted Greedy total_runtime = 0 for k in range(1, n + 1): # Run algorithm start = timer() result = alg.run(self.config, data, "distorted_greedy", None, None, scaling_factor, num_sampled_skills, rare_sample_fraction, popular_sample_fraction, rare_threshold, popular_threshold, user_sample_ratio, seed, k) end = timer() print('Previous runtime:', total_runtime, 'new runtime:', end - start) total_runtime += end - start result['runtime'] = total_runtime results.append(result) self.logger.info( "Algorithm: {} and k: {} and runtime: {}".format( "distorted_greedy", k, total_runtime)) self.logger.info("\n") # Stochastic Distorted Greedy total_runtime = 0 for k in range(1, n + 1): for sample_epsilon in sampling_epsilon_values_stochastic: # Run algorithm start = timer() result = alg.run( config, data, "stochastic_distorted_greedy", sample_epsilon, None, scaling_factor, num_sampled_skills, rare_sample_fraction, popular_sample_fraction, rare_threshold, popular_threshold, user_sample_ratio, seed, k) end = timer() total_runtime += end - start result['runtime'] = total_runtime results.append(result) self.logger.info( "Algorithm: {} and epsilon: {} and k: {} and runtime: {}" .format("stochastic_distorted_greedy", sample_epsilon, k, total_runtime)) self.logger.info("\n") # Cost Scaled Greedy # Run algorithm that creates greedy ordering start = timer() result = alg.run(self.config, data, "cost_scaled_greedy", None, None, scaling_factor, num_sampled_skills, rare_sample_fraction, popular_sample_fraction, rare_threshold, popular_threshold, user_sample_ratio, seed, n) end = timer() result['runtime'] = end - start # For each individual k we find the prefix of size k and find the corresponding solution for k in range(1, n + 1): result_k = result.copy() if k < len(result['sol']): sol_k = set(list(result['sol'])[:k]) submodular_val_k = data.submodular_func(sol_k) cost_k = data.cost_func(sol_k) val_k = submodular_val_k - cost_k result_k['sol'] = sol_k result_k['val'] = val_k result_k['submodular_val'] = submodular_val_k result_k['cost'] = cost_k else: sol_k = result['sol'] val_k = result['val'] result_k['k'] = k results.append(result_k) self.logger.info( "Best solution: {}\nBest value: {}".format( sol_k, val_k)) self.logger.info( "Algorithm: {} and k: {} and runtime: {}".format( "cost_scaled_greedy", k, end - start)) self.logger.info("\n") # Cost scaled lazy exact greedy # Run algorithm that creates greedy ordering start = timer() result = alg.run(self.config, data, "cost_scaled_lazy_greedy", None, None, scaling_factor, num_sampled_skills, rare_sample_fraction, popular_sample_fraction, rare_threshold, popular_threshold, user_sample_ratio, seed, n) end = timer() result['runtime'] = end - start # For each individual k we find the prefix of size k and find the corresponding solution for k in range(1, n + 1): result_k = result.copy() if k < len(result['sol']): sol_k = set(list(result['sol'])[:k]) submodular_val_k = data.submodular_func(sol_k) cost_k = data.cost_func(sol_k) val_k = submodular_val_k - cost_k result_k['sol'] = sol_k result_k['val'] = val_k result_k['submodular_val'] = submodular_val_k result_k['cost'] = cost_k else: sol_k = result['sol'] val_k = result['val'] result_k['k'] = k results.append(result_k) self.logger.info( "Best solution: {}\nBest value: {}".format( sol_k, val_k)) self.logger.info( "Algorithm: {} and k: {} and runtime: {}".format( "cost_scaled_lazy_greedy", k, end - start)) self.logger.info("\n") # Scaled Single Threshold Greedy total_runtime = 0 for k in range(1, n + 1): for error_epsilon in error_epsilon_values_scaled_threshold: # Run algorithm start = timer() result = alg.run(self.config, data, "scaled_single_threshold_greedy", None, error_epsilon, scaling_factor, num_sampled_skills, rare_sample_fraction, popular_sample_fraction, rare_threshold, popular_threshold, user_sample_ratio, seed, k) end = timer() total_runtime += end - start result['runtime'] = total_runtime results.append(result) self.logger.info( "Algorithm: {} and epsilon: {} and k: {} and runtime: {}" .format("scaled_single_threshold_greedy", error_epsilon, k, total_runtime)) self.logger.info("\n") # Baseline Top k total_runtime = 0 for k in range(1, n + 1): # Run algorithm start = timer() result = alg.run(self.config, data, "baseline_topk", None, None, scaling_factor, num_sampled_skills, rare_sample_fraction, popular_sample_fraction, rare_threshold, popular_threshold, user_sample_ratio, seed, k) end = timer() total_runtime += end - start result['runtime'] = total_runtime results.append(result) self.logger.info( "Algorithm: {} and k: {} and runtime: {}".format( "baseline_topk", k, total_runtime)) self.logger.info("\n") self.logger.info("Finished experiment 03") # Export results df = pd.DataFrame(results) # self.data_exporter.export_csv_file(df, "experiment_03_freelancer_pop01_rare01_cost_scaled.csv") self.logger.info("Exported experiment_03 results")
class Experiment02(object): """ Experiment02 class """ def __init__(self, config): """ Constructor :param config: :return: """ self.config = config self.logger = logging.getLogger("so_logger") self.data_provider = DataProvider(self.config) self.data_exporter = DataExporter(self.config) @staticmethod def run_algorithm(args): # Run algorithm alg = AlgorithmDriver() data = alg.run(*args) return data def set_scaling_factor(self,data): # Find appropriate scaling factor alg = SetCoverGreedy(self.config, data.submodular_func, data.E) sol = alg.run() submodular_val = data.submodular_func(sol) cost = data.cost_func(sol) scaling_factor = data.scaling_func(submodular_val, cost) # Update scaling factor data.scaling_factor = scaling_factor def run(self): """ Run experiment :param: :return: """ self.logger.info("Starting experiment 02") self.expt_config = self.config['experiment_configs']['experiment_02'] popular_threshold = self.expt_config['popular_threshold'] rare_threshold = self.expt_config['rare_threshold'] user_sample_ratios = [0.4,1] seeds = [i for i in range(6,10)] ks = [1,5,10,15,20,25,30,35,40,45,50] sampling_epsilon_values_stochastic = [0.1,0.05,0.01,0.005] error_epsilon_values_scaled_threshold = [0.2,0.15,0.1,0.05] num_sampled_skills = 50 rare_sample_fraction = 0.1 popular_sample_fraction = 0.1 scaling_factor = 800 alg = AlgorithmDriver() results = [] for seed in seeds: for user_sample_ratio in user_sample_ratios: self.logger.info("Experiment for user sample ratio: {} and scaling factor: {} and seed: {}".format(user_sample_ratio,scaling_factor,seed)) # Load dataset data = self.data_provider.read_freelancer_data_obj() config = self.config.copy() alg.create_sample(config, data, num_sampled_skills, rare_sample_fraction, popular_sample_fraction, rare_threshold,popular_threshold, user_sample_ratio, seed) self.logger.info("Scaling factor for submodular function is: {}".format(scaling_factor)) # Distorted Greedy for k in ks: # Run algorithm start = timer() result = alg.run(self.config, data, "distorted_greedy", None, None, scaling_factor, num_sampled_skills, rare_sample_fraction, popular_sample_fraction, rare_threshold, popular_threshold, user_sample_ratio, seed, k) end = timer() result['runtime'] = end - start results.append(result) self.logger.info("Algorithm: {} and k: {} and runtime: {}".format("distorted_greedy",k,end - start)) self.logger.info("\n") # Stochastic Distorted Greedy for k in ks: for sample_epsilon in sampling_epsilon_values_stochastic: # Run algorithm start = timer() result = alg.run(config, data, "stochastic_distorted_greedy", sample_epsilon, None, scaling_factor, num_sampled_skills, rare_sample_fraction, popular_sample_fraction, rare_threshold, popular_threshold, user_sample_ratio, seed, k) end = timer() result['runtime'] = end - start results.append(result) self.logger.info("Algorithm: {} and epsilon: {} and k: {} and runtime: {}".format("stochastic_distorted_greedy",sample_epsilon,k,end - start)) self.logger.info("\n") # Cost Scaled Greedy for k in ks: # Run algorithm start = timer() result = alg.run(self.config, data, "cost_scaled_greedy", None, None, scaling_factor, num_sampled_skills, rare_sample_fraction, popular_sample_fraction, rare_threshold, popular_threshold, user_sample_ratio, seed, k) end = timer() result['runtime'] = end - start results.append(result) self.logger.info("Algorithm: {} and k: {} and runtime: {}".format("cost_scaled_greedy",k,end - start)) self.logger.info("\n") # Cost scaled lazy exact greedy for k in ks: # Run algorithm start = timer() result = alg.run(self.config, data, "cost_scaled_lazy_greedy", None, None, scaling_factor, num_sampled_skills, rare_sample_fraction, popular_sample_fraction, rare_threshold, popular_threshold, user_sample_ratio, seed, k) end = timer() result['runtime'] = end - start results.append(result) self.logger.info("Algorithm: {} and k: {} and runtime: {}".format("cost_scaled_lazy_greedy",k,end - start)) self.logger.info("\n") # Greedy for k in ks: # Run algorithm start = timer() result = alg.run(self.config, data, "greedy", None, None, scaling_factor, num_sampled_skills, rare_sample_fraction, popular_sample_fraction, rare_threshold, popular_threshold, user_sample_ratio, seed, k) end = timer() result['runtime'] = end - start results.append(result) self.logger.info("Algorithm: {} and k: {} and runtime: {}".format("greedy",k,end - start)) self.logger.info("\n") # Scaled Single Threshold Greedy for k in ks: for error_epsilon in error_epsilon_values_scaled_threshold: # Run algorithm start = timer() result = alg.run(self.config, data, "scaled_single_threshold_greedy", None, error_epsilon, scaling_factor, num_sampled_skills, rare_sample_fraction, popular_sample_fraction, rare_threshold, popular_threshold, user_sample_ratio, seed, k) end = timer() result['runtime'] = end - start results.append(result) self.logger.info("Algorithm: {} and epsilon: {} and k: {} and runtime: {}".format("scaled_single_threshold_greedy",error_epsilon,k,end - start)) self.logger.info("\n") # Baseline Top k for k in ks: # Run algorithm start = timer() result = alg.run(self.config, data, "baseline_topk", None, None, scaling_factor, num_sampled_skills, rare_sample_fraction, popular_sample_fraction, rare_threshold, popular_threshold, user_sample_ratio, seed, k) end = timer() result['runtime'] = end - start results.append(result) self.logger.info("Algorithm: {} and k: {} and runtime: {}".format("baseline_topk",k,end - start)) self.logger.info("\n") self.logger.info("Finished experiment 02") # Export results df = pd.DataFrame(results) self.data_exporter.export_csv_file(df, "experiment_02_freelancer_pop01_rare01_greedy.csv") self.logger.info("Exported experiment_02 results")
class Experiment00(object): """ Experiment00 class """ def __init__(self, config): """ Constructor :param config: :return: """ self.config = config self.logger = logging.getLogger("so_logger") self.data_provider = DataProvider(self.config) self.data_exporter = DataExporter(self.config) @staticmethod def run_algorithm(args): # Run algorithm alg = AlgorithmDriver() data = alg.run(*args) return data def set_scaling_factor(self, data): # Find appropriate scaling factor alg = SetCoverGreedy(self.config, data.submodular_func, data.E) sol = alg.run() submodular_val = data.submodular_func(sol) cost = data.cost_func(sol) scaling_factor = data.scaling_func(submodular_val, cost) # Update scaling factor data.scaling_factor = scaling_factor def run(self): """ Run experiment :param: :return: """ self.logger.info("Starting experiment 00") self.expt_config = self.config['experiment_configs']['experiment_00'] popular_threshold = self.expt_config['popular_threshold'] rare_threshold = self.expt_config['rare_threshold'] user_sample_ratios = [ 0.001, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1 ] seeds = [i for i in range(6, 11)] sampling_epsilon_values = [0.1, 0.05, 0.01, 0.005] num_sampled_skills = 50 rare_sample_fraction = 0.1 popular_sample_fraction = 0.1 scaling_factor = 800 alg = AlgorithmDriver() results = [] for seed in seeds: for user_sample_ratio in user_sample_ratios: self.logger.info( "Experiment for user sample ratio: {} and scaling factor: {} and seed: {}" .format(user_sample_ratio, scaling_factor, seed)) # Load dataset data = self.data_provider.read_freelancer_data_obj() config = self.config.copy() alg.create_sample(config, data, num_sampled_skills, rare_sample_fraction, popular_sample_fraction, rare_threshold, popular_threshold, user_sample_ratio, seed) # # Create controlled samples dataset # data.sample_skills_to_be_covered_controlled(num_sampled_skills, rare_sample_fraction, # popular_sample_fraction, rare_threshold, # popular_threshold, user_sample_ratio) # # Setting scaling factor of coverage as coverage(S)/cost(S) for set cover solution S # self.set_scaling_factor(data) self.logger.info( "Scaling factor for submodular function is: {}".format( scaling_factor)) # Distorted greedy - ICML start = timer() result = alg.run(config, data, "distorted_greedy", None, None, scaling_factor, num_sampled_skills, rare_sample_fraction, popular_sample_fraction, rare_threshold, popular_threshold, user_sample_ratio, seed, None) end = timer() result['runtime'] = end - start self.logger.info( "Algorithm: {} and k: {} and runtime: {}".format( "distorted_greedy", None, end - start)) results.append(result) self.logger.info("\n") # Cost scaled greedy start = timer() result = alg.run(config, data, "cost_scaled_greedy", None, None, scaling_factor, num_sampled_skills, rare_sample_fraction, popular_sample_fraction, rare_threshold, popular_threshold, user_sample_ratio, seed, None) end = timer() result['runtime'] = end - start self.logger.info( "Algorithm: {} and k: {} and runtime: {}".format( "cost_scaled_greedy", None, end - start)) results.append(result) self.logger.info("\n") # Cost scaled lazy exact greedy start = timer() result = alg.run(config, data, "cost_scaled_lazy_greedy", None, None, scaling_factor, num_sampled_skills, rare_sample_fraction, popular_sample_fraction, rare_threshold, popular_threshold, user_sample_ratio, seed, None) end = timer() result['runtime'] = end - start self.logger.info( "Algorithm: {} and k: {} and runtime: {}".format( "cost_scaled_lazy_greedy", None, end - start)) results.append(result) self.logger.info("\n") # Greedy start = timer() result = alg.run(config, data, "greedy", None, None, scaling_factor, num_sampled_skills, rare_sample_fraction, popular_sample_fraction, rare_threshold, popular_threshold, user_sample_ratio, seed, None) end = timer() result['runtime'] = end - start self.logger.info( "Algorithm: {} and k: {} and runtime: {}".format( "greedy", None, end - start)) results.append(result) self.logger.info("\n") # Unconstrained Linear start = timer() result = alg.run(config, data, "unconstrained_linear", None, None, scaling_factor, num_sampled_skills, rare_sample_fraction, popular_sample_fraction, rare_threshold, popular_threshold, user_sample_ratio, seed, None) end = timer() result['runtime'] = end - start self.logger.info( "Algorithm: {} and k: {} and runtime: {}".format( "unconstrained_linear", None, end - start)) results.append(result) self.logger.info("\n") # Unconstrained distorted greedy start = timer() result = alg.run(config, data, "unconstrained_distorted_greedy", None, None, scaling_factor, num_sampled_skills, rare_sample_fraction, popular_sample_fraction, rare_threshold, popular_threshold, user_sample_ratio, seed, None) end = timer() result['runtime'] = end - start self.logger.info( "Algorithm: {} and k: {} and runtime: {}".format( "unconstrained_distorted_greedy", None, end - start)) results.append(result) self.logger.info("\n") # Stochastic distorted greedy for sample_epsilon in sampling_epsilon_values: start = timer() config['algorithms']['stochastic_distorted_greedy_config'][ 'epsilon'] = sample_epsilon result = alg.run(config, data, "stochastic_distorted_greedy", sample_epsilon, None, scaling_factor, num_sampled_skills, rare_sample_fraction, popular_sample_fraction, rare_threshold, popular_threshold, user_sample_ratio, seed, None) end = timer() result['runtime'] = end - start self.logger.info( "Algorithm: {} and epsilon: {} and k: {} and runtime: {}" .format("stochastic_distorted_greedy", sample_epsilon, None, end - start)) results.append(result) self.logger.info("\n") # Baseline top k start = timer() result = alg.run(config, data, "baseline_topk", None, None, scaling_factor, num_sampled_skills, rare_sample_fraction, popular_sample_fraction, rare_threshold, popular_threshold, user_sample_ratio, seed, None) end = timer() result['runtime'] = end - start self.logger.info( "Algorithm: {} and k: {} and runtime: {}".format( "baseline_topk", None, end - start)) results.append(result) self.logger.info("Finished experiment 00") # Export results df = pd.DataFrame(results) self.data_exporter.export_csv_file( df, "experiment_00_freelancer_pop01_rare01_greedy.csv") self.logger.info("Exported experiment_00 results")