def test_db(): if not TestDatabase.db: error( "You can configure experiment database for tests by calling for_tests=True flag in setup_experiment_database" ) return None return TestDatabase.db
def run(self, data, knobs): if not super(self.__class__, self).run(data, knobs): error("Aborting analysis.") return # pooled_std = sqrt((var(self.y1) + var(self.y2)) / 2) # effect_size = self.mean_diff / pooled_std sample_size = len(self.y1) power = tt_ind_solve_power(effect_size=self.effect_size, nobs1=sample_size, alpha=self.alpha, alternative=self.alternative) result = dict() result["effect_size"] = self.effect_size result["sample_size"] = sample_size result["alpha"] = self.alpha result["power"] = power # result["mean_diff"] = self.mean_diff # result["pooled_std"] = pooled_std return result
def user_db(): if not UserDatabase.db: error( "You can setup the user database using experiment_db_config.json file" ) return None return UserDatabase.db
def run(self, data, knobs): if not super(self.__class__, self).run(data, knobs): error("Aborting analysis.") return if not self.effect_size: if not self.mean_diff: raise Exception( "You cannot leave both mean_diff and effect_size paramaters empty" ) pooled_std = sqrt((var(self.y1) + var(self.y2)) / 2) effect_size = self.mean_diff / pooled_std else: effect_size = self.effect_size sample_size = tt_ind_solve_power(effect_size=effect_size, nobs1=None, alpha=self.alpha, power=self.power, alternative=self.alternative) result = dict() result["effect_size"] = effect_size result["sample_size"] = floor(sample_size) result["alpha"] = self.alpha result["power"] = self.power # result["mean_diff"] = self.mean_diff # result["pooled_std"] = pooled_std return result
def db(): if not ExperimentDatabase.DB: error( "You can configure experiment database using Configuration section in the dashboard." ) return None return ExperimentDatabase.DB
def __init__(self, stage_ids, y_key, alpha=0.05): super(self.__class__, self).__init__(stage_ids, y_key) if alpha not in [0.15, 0.10, 0.05, 0.02, 0.01]: error("For Anderson-Darling test, please select as alpha one of 0.15, 0.10, 0.05, 0.02, or 0.01. " "Falling back to default value of alpha = 0.05") self.alpha = 0.05 else: self.alpha = alpha
def run(self, data, knobs): if len(data) < 2: error("Cannot run " + self.name + " on less than two samples.") return False self.y = [[d for d in data[i]] for i in range(len(self.stage_ids))] return True
def run(self, data, knobs): dataframe_data = dict() # NEW: removed logic of d[self.y_key] because they are already retrieved in proper form y_values = [d for i in range(self.stages_count) for d in data[i]] dataframe_data[self.y_key] = y_values # NEW: alter knob_keys to fit for the previous logic # IMPORTANT ASSUMPTION HERE: as discussed before, we apply these analysis tests to stages of the same experiment # so, knobs[0].keys() == knobs[1].keys() == knobs[2].keys() == global keys if not self.knob_keys: self.knob_keys = knobs[0].keys() if len(self.knob_keys) < 2: error("Cannot run " + self.name + " on one factor.") error("Aborting analysis") return for knob_key in self.knob_keys: res = [] for i in range(self.stages_count): for d in data[i]: res.append(knobs[i][knob_key]) dataframe_data[knob_key] = res # data for quick tests: # dataframe_data = {} # dataframe_data["overhead"] = [2.3, 1.3, 2.8, 2.5, 2.9, 2.4, 1.4, 2.6, 1.8, 1.9, 1.2, 3.0] # dataframe_data["route_random_sigma"] = [0, 0, 0, 0.2, 0.2, 0.2, 0, 0, 0, 0, 0, 0] # dataframe_data["exploration_percentage"] = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0.2, 0.2, 0.2] df = pd.DataFrame(dataframe_data) print(df) print("------------------") formula = self.create_formula() # formula = "overhead ~ route_random_sigma * exploration_percentage" print(formula) print("------------------") data_lm = ols(formula, data=dataframe_data).fit() print(data_lm.summary()) print("------------------") aov_table = anova_lm(data_lm, typ=2) aov_table_sqr = deepcopy(aov_table) self.eta_squared(aov_table_sqr) self.omega_squared(aov_table_sqr) # TODO: aov_table = aov_table[aov_table["omega_sq"] > min_effect_size] can also be integrated # remove same cols, see: https://stackoverflow.com/questions/13411544/delete-column-from-pandas-dataframe-using-del-df-column-name columns = ['sum_sq', 'df', 'F', 'PR(>F)'] aov_table_sqr.drop(columns, inplace=True, axis=1) return aov_table, aov_table_sqr
def run(self, data, knobs): if len(data) < 2: error("Cannot run " + self.name + " on less than two samples.") return False if len(data) > 2: warn("Cannot run " + self.name + " on more than two samples.") warn("Comparing only the first two samples.") self.y1 = [d for d in data[0]] self.y2 = [d for d in data[1]] return True
def run(self, data, knobs): if not super(EqualVarianceTest, self).run(data, knobs): error("Aborting analysis.") return statistic, pvalue = self.get_statistic_and_pvalue(self.y) not_equal_variance = bool(pvalue <= self.alpha) result = dict() result["statistic"] = statistic result["pvalue"] = pvalue result["alpha"] = self.alpha result["not_equal_variance"] = not_equal_variance return result
def run(self, data, knobs): if not super(DifferentDistributionsTest, self).run(data, knobs): error("Aborting analysis.") return statistic, pvalue = self.get_statistic_and_pvalue(self.y) different_distributions = bool(pvalue <= self.alpha) result = dict() result["statistic"] = statistic result["pvalue"] = pvalue result["alpha"] = self.alpha result["different_distributions"] = different_distributions return result
def setup_user_database(): with open('./oeda/databases/user_db_config.json') as json_data_file: try: config_data = load(json_data_file) UserDatabase.DB = create_instance_for_users( config_data['db_type'], config_data['host'], config_data['port'], config_data) except ValueError: error( "> You need to specify the user database configuration in databases/user_db_config.json" ) exit(0) except KeyError: error( "> You need to specify 'db_type', 'host', 'port' values in databases/user_db_config.json properly" ) exit(0)
def setup_experiment_database(db_type, host, port): with open('./oeda/databases/experiment_db_config.json') as json_data_file: try: config_data = load(json_data_file) ExperimentDatabase.DB = create_instance_for_experiments( db_type, host, port, config_data) except ValueError: error( "> You need to specify the user database configuration in databases/experiment_db_config.json" ) exit(0) except KeyError: error( "> You need to specify 'db_type', 'host', 'port' values in databases/experiment_db_config.json properly" ) exit(0) except ConnectionError as conn_err: raise conn_err
def setup_experiment_database(db_type, host, port, for_tests=False): current_directory = os.path.dirname(__file__) parent_directory = os.path.split(current_directory)[0] file_path = os.path.join(parent_directory, 'databases', 'experiment_db_config.json') try: with open(file_path) as json_data_file: try: config_data = load(json_data_file) if for_tests: # change the index config_data["index"]["name"] += str("_test") TestDatabase.db = create_db_instance_for_experiments( db_type, host, port, config_data) else: ExperimentDatabase.db = create_db_instance_for_experiments( db_type, host, port, config_data) except ValueError as ve: print(ve) error( "> You need to specify the user database configuration in databases/experiment_db_config.json" ) exit(0) except KeyError: error( "> You need to specify 'db_type', 'host', 'port' values in databases/experiment_db_config.json properly" ) exit(0) except ConnectionError as conn_err: raise conn_err except Exception as exc: error( "> You need to specify a 'experiment_db_config.json' file in the databases folder. You can copy the template in that folder!" ) raise exc
def setup_user_database(): current_directory = os.path.dirname(__file__) parent_directory = os.path.split(current_directory)[0] file_path = os.path.join(parent_directory, 'databases', 'user_db_config.json') try: with open(file_path) as json_data_file: try: config_data = load(json_data_file) user_db = create_db_instance_for_users(config_data) UserDatabase.db = user_db except ValueError: error( "> You need to specify the user database configuration in databases/user_db_config.json" ) exit(0) except KeyError: error( "> You need to specify 'db_type', 'host', 'port' values in databases/user_db_config.json properly" ) exit(0) except: error( "> You need to specify a 'user_db_config.json' file in the databases folder. You can copy the templates in that folder!" ) exit(0)
def run(self, data, knobs): if not super(self.__class__, self).run(data, knobs): error("Aborting analysis.") return statistic, pvalue = ttest_ind(self.y1, self.y2, equal_var=False) different_averages = bool(pvalue <= self.alpha) result = dict() result["statistic"] = statistic result["pvalue"] = pvalue result["alpha"] = self.alpha result["different_averages"] = different_averages result["mean_diff"] = mean(self.y1) - mean(self.y2) # calculating cohen's d effect size (): pooled_std = sqrt((var(self.y1) + var(self.y2)) / 2) result["effect_size"] = result["mean_diff"] / pooled_std return result