def run(self):
        """
        Run experiment
        :param:
        :return:
        """
        self.logger.info("Starting experiment 02")

        self.expt_config = self.config['experiment_configs']['experiment_02']
        popular_threshold = self.expt_config['popular_threshold']
        rare_threshold = self.expt_config['rare_threshold']

        user_sample_ratios = [0.4,1]
        seeds = [i for i in range(6,10)]
        ks = [1,5,10,15,20,25,30,35,40,45,50]

        sampling_epsilon_values_stochastic = [0.1,0.05,0.01,0.005]
        error_epsilon_values_scaled_threshold = [0.2,0.15,0.1,0.05]

        num_sampled_skills = 50
        rare_sample_fraction = 0.1
        popular_sample_fraction = 0.1
        scaling_factor = 800

        alg = AlgorithmDriver()
        results = []
        for seed in seeds:
            for user_sample_ratio in user_sample_ratios:
                self.logger.info("Experiment for user sample ratio: {} and scaling factor: {} and seed: {}".format(user_sample_ratio,scaling_factor,seed))

                # Load dataset
                data = self.data_provider.read_freelancer_data_obj()
                config = self.config.copy()
                alg.create_sample(config, data, num_sampled_skills, rare_sample_fraction, popular_sample_fraction, 
                                    rare_threshold,popular_threshold, user_sample_ratio, seed)


                self.logger.info("Scaling factor for submodular function is: {}".format(scaling_factor))

                # Distorted Greedy
                for k in ks:
                    # Run algorithm
                    start = timer()
                    result = alg.run(self.config, data, "distorted_greedy",
                         None, None, scaling_factor, num_sampled_skills,
                         rare_sample_fraction, popular_sample_fraction, rare_threshold, popular_threshold,
                         user_sample_ratio, seed, k)
                    end = timer()
                    result['runtime'] = end - start
                    results.append(result)
                    self.logger.info("Algorithm: {} and k: {} and runtime: {}".format("distorted_greedy",k,end - start))

                self.logger.info("\n")

                # Stochastic Distorted Greedy
                for k in ks:
                    for sample_epsilon in sampling_epsilon_values_stochastic:
                        # Run algorithm
                        start = timer()
                        result = alg.run(config, data, "stochastic_distorted_greedy",
                             sample_epsilon, None, scaling_factor, num_sampled_skills,
                             rare_sample_fraction, popular_sample_fraction, rare_threshold, popular_threshold,
                             user_sample_ratio, seed, k) 
                        end = timer()
                        result['runtime'] = end - start
                        results.append(result)
                        self.logger.info("Algorithm: {} and epsilon: {} and k: {} and runtime: {}".format("stochastic_distorted_greedy",sample_epsilon,k,end - start))

                self.logger.info("\n")

                # Cost Scaled Greedy              
                for k in ks:
                    # Run algorithm
                    start = timer()
                    result = alg.run(self.config, data, "cost_scaled_greedy",
                         None, None, scaling_factor, num_sampled_skills,
                         rare_sample_fraction, popular_sample_fraction, rare_threshold, popular_threshold,
                         user_sample_ratio, seed, k)
                    end = timer()
                    result['runtime'] = end - start
                    results.append(result)
                    self.logger.info("Algorithm: {} and k: {} and runtime: {}".format("cost_scaled_greedy",k,end - start))

                self.logger.info("\n")

                # Cost scaled lazy exact greedy
                for k in ks:
                    # Run algorithm
                    start = timer()
                    result = alg.run(self.config, data, "cost_scaled_lazy_greedy",
                         None, None, scaling_factor, num_sampled_skills,
                         rare_sample_fraction, popular_sample_fraction, rare_threshold, popular_threshold,
                         user_sample_ratio, seed, k)
                    end = timer()
                    result['runtime'] = end - start
                    results.append(result)
                    self.logger.info("Algorithm: {} and k: {} and runtime: {}".format("cost_scaled_lazy_greedy",k,end - start))

                self.logger.info("\n")

                # Greedy              
                for k in ks:
                    # Run algorithm
                    start = timer()
                    result = alg.run(self.config, data, "greedy",
                         None, None, scaling_factor, num_sampled_skills,
                         rare_sample_fraction, popular_sample_fraction, rare_threshold, popular_threshold,
                         user_sample_ratio, seed, k)
                    end = timer()
                    result['runtime'] = end - start
                    results.append(result)
                    self.logger.info("Algorithm: {} and k: {} and runtime: {}".format("greedy",k,end - start))

                self.logger.info("\n")

                # Scaled Single Threshold Greedy
                for k in ks:
                    for error_epsilon in error_epsilon_values_scaled_threshold:
                        # Run algorithm
                        start = timer()
                        result = alg.run(self.config, data, "scaled_single_threshold_greedy",
                             None, error_epsilon, scaling_factor, num_sampled_skills,
                             rare_sample_fraction, popular_sample_fraction, rare_threshold, popular_threshold,
                             user_sample_ratio, seed, k)
                        end = timer()
                        result['runtime'] = end - start
                        results.append(result)
                        self.logger.info("Algorithm: {} and epsilon: {} and k: {} and runtime: {}".format("scaled_single_threshold_greedy",error_epsilon,k,end - start))

                self.logger.info("\n")

                # Baseline Top k               
                for k in ks:
                    # Run algorithm
                    start = timer()
                    result = alg.run(self.config, data, "baseline_topk",
                         None, None, scaling_factor, num_sampled_skills,
                         rare_sample_fraction, popular_sample_fraction, rare_threshold, popular_threshold,
                         user_sample_ratio, seed, k)
                    end = timer()
                    result['runtime'] = end - start
                    results.append(result)
                    self.logger.info("Algorithm: {} and k: {} and runtime: {}".format("baseline_topk",k,end - start))

                self.logger.info("\n")


        self.logger.info("Finished experiment 02")

        # Export results
        df = pd.DataFrame(results)
        self.data_exporter.export_csv_file(df, "experiment_02_freelancer_pop01_rare01_greedy.csv")
        self.logger.info("Exported experiment_02 results")
 def run_algorithm(args):
     # Run algorithm
     alg = AlgorithmDriver()
     data = alg.run(*args)
     return data
    def run(self):
        """
        Run experiment
        :param:
        :return:
        """
        self.logger.info("Starting experiment 00")

        self.expt_config = self.config['experiment_configs']['experiment_00']
        popular_threshold = self.expt_config['popular_threshold']
        rare_threshold = self.expt_config['rare_threshold']

        user_sample_ratios = [
            0.001, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1
        ]
        seeds = [i for i in range(6, 11)]

        sampling_epsilon_values = [0.1, 0.05, 0.01, 0.005]

        num_sampled_skills = 50
        rare_sample_fraction = 0.1
        popular_sample_fraction = 0.1
        scaling_factor = 800

        alg = AlgorithmDriver()
        results = []
        for seed in seeds:
            for user_sample_ratio in user_sample_ratios:
                self.logger.info(
                    "Experiment for user sample ratio: {} and scaling factor: {} and seed: {}"
                    .format(user_sample_ratio, scaling_factor, seed))

                # Load dataset
                data = self.data_provider.read_freelancer_data_obj()
                config = self.config.copy()
                alg.create_sample(config, data, num_sampled_skills,
                                  rare_sample_fraction,
                                  popular_sample_fraction, rare_threshold,
                                  popular_threshold, user_sample_ratio, seed)

                # # Create controlled samples dataset
                # data.sample_skills_to_be_covered_controlled(num_sampled_skills, rare_sample_fraction,
                #                                         popular_sample_fraction, rare_threshold,
                #                                         popular_threshold, user_sample_ratio)

                # # Setting scaling factor of coverage as coverage(S)/cost(S) for set cover solution S
                # self.set_scaling_factor(data)

                self.logger.info(
                    "Scaling factor for submodular function is: {}".format(
                        scaling_factor))

                # Distorted greedy - ICML
                start = timer()
                result = alg.run(config, data, "distorted_greedy", None, None,
                                 scaling_factor, num_sampled_skills,
                                 rare_sample_fraction, popular_sample_fraction,
                                 rare_threshold, popular_threshold,
                                 user_sample_ratio, seed, None)
                end = timer()
                result['runtime'] = end - start
                self.logger.info(
                    "Algorithm: {} and k: {} and runtime: {}".format(
                        "distorted_greedy", None, end - start))
                results.append(result)

                self.logger.info("\n")

                # Cost scaled greedy
                start = timer()
                result = alg.run(config, data, "cost_scaled_greedy", None,
                                 None, scaling_factor, num_sampled_skills,
                                 rare_sample_fraction, popular_sample_fraction,
                                 rare_threshold, popular_threshold,
                                 user_sample_ratio, seed, None)
                end = timer()
                result['runtime'] = end - start
                self.logger.info(
                    "Algorithm: {} and k: {} and runtime: {}".format(
                        "cost_scaled_greedy", None, end - start))
                results.append(result)

                self.logger.info("\n")

                # Cost scaled lazy exact greedy
                start = timer()
                result = alg.run(config, data, "cost_scaled_lazy_greedy", None,
                                 None, scaling_factor, num_sampled_skills,
                                 rare_sample_fraction, popular_sample_fraction,
                                 rare_threshold, popular_threshold,
                                 user_sample_ratio, seed, None)
                end = timer()
                result['runtime'] = end - start
                self.logger.info(
                    "Algorithm: {} and k: {} and runtime: {}".format(
                        "cost_scaled_lazy_greedy", None, end - start))
                results.append(result)

                self.logger.info("\n")

                # Greedy
                start = timer()
                result = alg.run(config, data, "greedy", None, None,
                                 scaling_factor, num_sampled_skills,
                                 rare_sample_fraction, popular_sample_fraction,
                                 rare_threshold, popular_threshold,
                                 user_sample_ratio, seed, None)
                end = timer()
                result['runtime'] = end - start
                self.logger.info(
                    "Algorithm: {} and k: {} and runtime: {}".format(
                        "greedy", None, end - start))
                results.append(result)

                self.logger.info("\n")

                # Unconstrained Linear
                start = timer()
                result = alg.run(config, data, "unconstrained_linear", None,
                                 None, scaling_factor, num_sampled_skills,
                                 rare_sample_fraction, popular_sample_fraction,
                                 rare_threshold, popular_threshold,
                                 user_sample_ratio, seed, None)
                end = timer()
                result['runtime'] = end - start
                self.logger.info(
                    "Algorithm: {} and k: {} and runtime: {}".format(
                        "unconstrained_linear", None, end - start))
                results.append(result)

                self.logger.info("\n")

                # Unconstrained distorted greedy
                start = timer()
                result = alg.run(config, data,
                                 "unconstrained_distorted_greedy", None, None,
                                 scaling_factor, num_sampled_skills,
                                 rare_sample_fraction, popular_sample_fraction,
                                 rare_threshold, popular_threshold,
                                 user_sample_ratio, seed, None)
                end = timer()
                result['runtime'] = end - start
                self.logger.info(
                    "Algorithm: {} and k: {} and runtime: {}".format(
                        "unconstrained_distorted_greedy", None, end - start))
                results.append(result)

                self.logger.info("\n")

                # Stochastic distorted greedy
                for sample_epsilon in sampling_epsilon_values:
                    start = timer()
                    config['algorithms']['stochastic_distorted_greedy_config'][
                        'epsilon'] = sample_epsilon
                    result = alg.run(config, data,
                                     "stochastic_distorted_greedy",
                                     sample_epsilon, None, scaling_factor,
                                     num_sampled_skills, rare_sample_fraction,
                                     popular_sample_fraction, rare_threshold,
                                     popular_threshold, user_sample_ratio,
                                     seed, None)
                    end = timer()
                    result['runtime'] = end - start
                    self.logger.info(
                        "Algorithm: {} and epsilon: {} and k: {} and runtime: {}"
                        .format("stochastic_distorted_greedy", sample_epsilon,
                                None, end - start))
                    results.append(result)

                self.logger.info("\n")

                # Baseline top k
                start = timer()
                result = alg.run(config, data, "baseline_topk", None, None,
                                 scaling_factor, num_sampled_skills,
                                 rare_sample_fraction, popular_sample_fraction,
                                 rare_threshold, popular_threshold,
                                 user_sample_ratio, seed, None)
                end = timer()
                result['runtime'] = end - start
                self.logger.info(
                    "Algorithm: {} and k: {} and runtime: {}".format(
                        "baseline_topk", None, end - start))
                results.append(result)

        self.logger.info("Finished experiment 00")

        # Export results
        df = pd.DataFrame(results)
        self.data_exporter.export_csv_file(
            df, "experiment_00_freelancer_pop01_rare01_greedy.csv")
        self.logger.info("Exported experiment_00 results")
    def run(self):
        """
        Run experiment
        :param:
        :return:
        """
        self.logger.info("Starting experiment 03")

        self.expt_config = self.config['experiment_configs']['experiment_03']
        popular_threshold = self.expt_config['popular_threshold']
        rare_threshold = self.expt_config['rare_threshold']

        user_sample_ratios = [0.05, 0.1]
        seeds = [i for i in range(6, 10)]

        sampling_epsilon_values_stochastic = [0.1, 0.05, 0.01, 0.005]
        error_epsilon_values_scaled_threshold = [0.2, 0.15, 0.1, 0.05]

        num_sampled_skills = 50
        rare_sample_fraction = 0.1
        popular_sample_fraction = 0.1
        scaling_factor = 800

        alg = AlgorithmDriver()
        results = []
        for seed in seeds:
            for user_sample_ratio in user_sample_ratios:

                # Load dataset
                data = self.data_provider.read_freelancer_data_obj()
                config = self.config.copy()
                alg.create_sample(config, data, num_sampled_skills,
                                  rare_sample_fraction,
                                  popular_sample_fraction, rare_threshold,
                                  popular_threshold, user_sample_ratio, seed)

                self.logger.info(
                    "Experiment for user sample ratio: {} and scaling factor: {} and seed: {} and number of elements: {}"
                    .format(user_sample_ratio, scaling_factor, seed,
                            len(data.E)))
                self.logger.info(
                    "Scaling factor for submodular function is: {}".format(
                        scaling_factor))

                # Total number of elements
                n = len(data.E)

                # Distorted Greedy
                total_runtime = 0
                for k in range(1, n + 1):
                    # Run algorithm
                    start = timer()
                    result = alg.run(self.config, data, "distorted_greedy",
                                     None, None, scaling_factor,
                                     num_sampled_skills, rare_sample_fraction,
                                     popular_sample_fraction, rare_threshold,
                                     popular_threshold, user_sample_ratio,
                                     seed, k)
                    end = timer()
                    print('Previous runtime:', total_runtime, 'new runtime:',
                          end - start)
                    total_runtime += end - start
                    result['runtime'] = total_runtime
                    results.append(result)
                    self.logger.info(
                        "Algorithm: {} and k: {} and runtime: {}".format(
                            "distorted_greedy", k, total_runtime))

                self.logger.info("\n")

                # Stochastic Distorted Greedy
                total_runtime = 0
                for k in range(1, n + 1):
                    for sample_epsilon in sampling_epsilon_values_stochastic:
                        # Run algorithm
                        start = timer()
                        result = alg.run(
                            config, data, "stochastic_distorted_greedy",
                            sample_epsilon, None, scaling_factor,
                            num_sampled_skills, rare_sample_fraction,
                            popular_sample_fraction, rare_threshold,
                            popular_threshold, user_sample_ratio, seed, k)
                        end = timer()
                        total_runtime += end - start
                        result['runtime'] = total_runtime
                        results.append(result)
                        self.logger.info(
                            "Algorithm: {} and epsilon: {} and k: {} and runtime: {}"
                            .format("stochastic_distorted_greedy",
                                    sample_epsilon, k, total_runtime))

                self.logger.info("\n")

                # Cost Scaled Greedy
                # Run algorithm that creates greedy ordering
                start = timer()
                result = alg.run(self.config, data, "cost_scaled_greedy", None,
                                 None, scaling_factor, num_sampled_skills,
                                 rare_sample_fraction, popular_sample_fraction,
                                 rare_threshold, popular_threshold,
                                 user_sample_ratio, seed, n)
                end = timer()
                result['runtime'] = end - start
                # For each individual k we find the prefix of size k and find the corresponding solution
                for k in range(1, n + 1):
                    result_k = result.copy()
                    if k < len(result['sol']):
                        sol_k = set(list(result['sol'])[:k])
                        submodular_val_k = data.submodular_func(sol_k)
                        cost_k = data.cost_func(sol_k)
                        val_k = submodular_val_k - cost_k
                        result_k['sol'] = sol_k
                        result_k['val'] = val_k
                        result_k['submodular_val'] = submodular_val_k
                        result_k['cost'] = cost_k
                    else:
                        sol_k = result['sol']
                        val_k = result['val']
                    result_k['k'] = k
                    results.append(result_k)
                    self.logger.info(
                        "Best solution: {}\nBest value: {}".format(
                            sol_k, val_k))
                    self.logger.info(
                        "Algorithm: {} and k: {} and runtime: {}".format(
                            "cost_scaled_greedy", k, end - start))

                self.logger.info("\n")

                # Cost scaled lazy exact greedy
                # Run algorithm that creates greedy ordering
                start = timer()
                result = alg.run(self.config, data, "cost_scaled_lazy_greedy",
                                 None, None, scaling_factor,
                                 num_sampled_skills, rare_sample_fraction,
                                 popular_sample_fraction, rare_threshold,
                                 popular_threshold, user_sample_ratio, seed, n)
                end = timer()
                result['runtime'] = end - start
                # For each individual k we find the prefix of size k and find the corresponding solution
                for k in range(1, n + 1):
                    result_k = result.copy()
                    if k < len(result['sol']):
                        sol_k = set(list(result['sol'])[:k])
                        submodular_val_k = data.submodular_func(sol_k)
                        cost_k = data.cost_func(sol_k)
                        val_k = submodular_val_k - cost_k
                        result_k['sol'] = sol_k
                        result_k['val'] = val_k
                        result_k['submodular_val'] = submodular_val_k
                        result_k['cost'] = cost_k
                    else:
                        sol_k = result['sol']
                        val_k = result['val']
                    result_k['k'] = k
                    results.append(result_k)
                    self.logger.info(
                        "Best solution: {}\nBest value: {}".format(
                            sol_k, val_k))
                    self.logger.info(
                        "Algorithm: {} and k: {} and runtime: {}".format(
                            "cost_scaled_lazy_greedy", k, end - start))

                self.logger.info("\n")

                # Scaled Single Threshold Greedy
                total_runtime = 0
                for k in range(1, n + 1):
                    for error_epsilon in error_epsilon_values_scaled_threshold:
                        # Run algorithm
                        start = timer()
                        result = alg.run(self.config, data,
                                         "scaled_single_threshold_greedy",
                                         None, error_epsilon, scaling_factor,
                                         num_sampled_skills,
                                         rare_sample_fraction,
                                         popular_sample_fraction,
                                         rare_threshold, popular_threshold,
                                         user_sample_ratio, seed, k)
                        end = timer()
                        total_runtime += end - start
                        result['runtime'] = total_runtime
                        results.append(result)
                        self.logger.info(
                            "Algorithm: {} and epsilon: {} and k: {} and runtime: {}"
                            .format("scaled_single_threshold_greedy",
                                    error_epsilon, k, total_runtime))

                self.logger.info("\n")

                # Baseline Top k
                total_runtime = 0
                for k in range(1, n + 1):
                    # Run algorithm
                    start = timer()
                    result = alg.run(self.config, data, "baseline_topk", None,
                                     None, scaling_factor, num_sampled_skills,
                                     rare_sample_fraction,
                                     popular_sample_fraction, rare_threshold,
                                     popular_threshold, user_sample_ratio,
                                     seed, k)
                    end = timer()
                    total_runtime += end - start
                    result['runtime'] = total_runtime
                    results.append(result)
                    self.logger.info(
                        "Algorithm: {} and k: {} and runtime: {}".format(
                            "baseline_topk", k, total_runtime))

                self.logger.info("\n")

        self.logger.info("Finished experiment 03")

        # Export results
        df = pd.DataFrame(results)
        # self.data_exporter.export_csv_file(df, "experiment_03_freelancer_pop01_rare01_cost_scaled.csv")
        self.logger.info("Exported experiment_03 results")
コード例 #5
0
    def run(self):
        """
        Run experiment
        :param:
        :return:
        """
        self.logger.info("Starting experiment 04")

        self.expt_config = self.config['experiment_configs']['experiment_04']
        popular_threshold = self.expt_config['popular_threshold']
        rare_threshold = self.expt_config['rare_threshold']
        num_of_partitions = self.expt_config['num_of_partitions']
        partition_type = self.expt_config['partition_type']
        cardinality_constraint = self.expt_config['cardinality_constraint']

        user_sample_ratios = [1]
        seeds = [i for i in range(6, 10)]
        cardinality_constraints = [i for i in range(1, 11)]
        num_of_partitions = [i for i in range(1, 6)]

        num_sampled_skills = 50
        rare_sample_fraction = 0.1
        popular_sample_fraction = 0.8
        scaling_factor = 800

        alg = AlgorithmDriver()
        results = []
        for seed in seeds:
            for user_sample_ratio in user_sample_ratios:
                for cardinality_constraint in cardinality_constraints:
                    for num_of_partition in num_of_partitions:
                        self.logger.info(
                            "Experiment for user sample ratio: {} and scaling factor: {} and seed: {} and cardinality constraint:{} and num of partitions:{} "
                            .format(user_sample_ratio, scaling_factor, seed,
                                    cardinality_constraint, num_of_partition))

                        # Load dataset
                        data = self.data_provider.read_guru_data_obj()
                        config = self.config.copy()
                        # Creating the ground set of users
                        alg.create_sample(config, data, num_sampled_skills,
                                          rare_sample_fraction,
                                          popular_sample_fraction,
                                          rare_threshold, popular_threshold,
                                          user_sample_ratio, seed)
                        # Assigning users to partitions uniformly at random
                        alg.create_partitions(data, num_of_partition,
                                              partition_type,
                                              cardinality_constraint)

                        self.logger.info(
                            "Scaling factor for submodular function is: {}".
                            format(scaling_factor))

                        # Partition matroid greedy
                        start = timer()
                        result = alg.run(
                            config, data, "partition_matroid_greedy", None,
                            None, scaling_factor, num_sampled_skills,
                            rare_sample_fraction, popular_sample_fraction,
                            rare_threshold, popular_threshold,
                            user_sample_ratio, seed, None)
                        end = timer()
                        result['runtime'] = end - start
                        result[
                            'cardinality_constraint'] = cardinality_constraint
                        result['num_of_partitions'] = num_of_partition
                        self.logger.info(
                            "Algorithm: {} and k: {} and runtime: {}".format(
                                "partition_matroid_greedy", None, end - start))
                        results.append(result)

                        self.logger.info("\n")

                        # Cost scaled partition matroid greedy
                        start = timer()
                        result = alg.run(
                            config, data,
                            "cost_scaled_partition_matroid_greedy", None, None,
                            scaling_factor, num_sampled_skills,
                            rare_sample_fraction, popular_sample_fraction,
                            rare_threshold, popular_threshold,
                            user_sample_ratio, seed, None)
                        end = timer()
                        result['runtime'] = end - start
                        result[
                            'cardinality_constraint'] = cardinality_constraint
                        result['num_of_partitions'] = num_of_partition
                        self.logger.info(
                            "Algorithm: {} and k: {} and runtime: {}".format(
                                "cost_scaled_partition_matroid_greedy", None,
                                end - start))
                        results.append(result)

                        self.logger.info("\n")

                        # Cost scaled partition matroid lazy exact greedy
                        start = timer()
                        result = alg.run(
                            config, data,
                            "cost_scaled_partition_matroid_lazy_greedy", None,
                            None, scaling_factor, num_sampled_skills,
                            rare_sample_fraction, popular_sample_fraction,
                            rare_threshold, popular_threshold,
                            user_sample_ratio, seed, None)
                        end = timer()
                        result['runtime'] = end - start
                        result[
                            'cardinality_constraint'] = cardinality_constraint
                        result['num_of_partitions'] = num_of_partition
                        self.logger.info(
                            "Algorithm: {} and k: {} and runtime: {}".format(
                                "cost_scaled_partition_matroid_lazy_greedy",
                                None, end - start))
                        results.append(result)

                        self.logger.info("\n")

                        # Baseline Top k
                        start = timer()
                        result = alg.run(config, data, "baseline_topk_matroid",
                                         None, None, scaling_factor,
                                         num_sampled_skills,
                                         rare_sample_fraction,
                                         popular_sample_fraction,
                                         rare_threshold, popular_threshold,
                                         user_sample_ratio, seed, None)
                        end = timer()
                        result['runtime'] = end - start
                        result[
                            'cardinality_constraint'] = cardinality_constraint
                        result['num_of_partitions'] = num_of_partition
                        self.logger.info(
                            "Algorithm: {} and k: {} and runtime: {}".format(
                                "baseline_topk_matroid", None, end - start))
                        results.append(result)

                        self.logger.info("\n")

        self.logger.info("Finished experiment 04")

        # Export results
        df = pd.DataFrame(results)
        self.data_exporter.export_csv_file(
            df, "experiment_04_guru_salary_pop08_rare01.csv")
        self.logger.info("Exported experiment 04 results")