def add_paired_property( dataset, combined_data: CombinedData, study_type: str, design: Dict[str, str] = None): # check same sizes are identical global paired x = None y = None combined_data.properties[paired] = False if isinstance(combined_data, BivariateData): if study_type == experiment_identifier: # Just need one variable to be Categorical and another to be Continuous (regardless of role) x = combined_data.get_vars(iv_identifier) y = combined_data.get_vars(dv_identifier) else: # study_type == observational_identifier x = combined_data.get_vars(contributor_identifier) y = combined_data.get_vars(outcome_identifier) if x and y: assert (len(x) == len(y) == 1) x = x[0] y = y[0] if x.is_categorical() and y.is_continuous(): if within_subj in design and design[within_subj] == x.metadata[ name]: combined_data.properties[paired] = True
def add_eq_variance_property(dataset, combined_data: CombinedData, study_type: str): xs = None ys = None cat_xs = [] cont_ys = [] grouped_data = [] if study_type == experiment_identifier: # Just need one variable to be Catogrical and another to be Continuous (regardless of role) -- both could be variable_identifier types xs = combined_data.get_vars(iv_identifier) ys = combined_data.get_vars(dv_identifier) else: # study_type == observational_identifier xs = combined_data.get_vars(contributor_identifier) ys = combined_data.get_vars(outcome_identifier) for x in xs: if x.is_categorical(): cat_xs.append(x) for y in ys: if y.is_continuous(): cont_ys.append(y) combined_data.properties[eq_variance] = None if cat_xs and cont_ys: for y in ys: for x in xs: cat = [k for k, v in x.metadata[categories].items()] for c in cat: data = dataset.select( y.metadata[name], where=[f"{x.metadata[name]} == '{c}'"]) grouped_data.append(data) if isinstance(combined_data, BivariateData): # Equal variance eq_var = compute_eq_variance(grouped_data) combined_data.properties[eq_variance] = eq_var elif isinstance(combined_data, MultivariateData): combined_data.properties[ eq_variance + '::' + x.metadata[name] + ':' + y.metadata[name]] = compute_eq_variance(grouped_data) else: raise ValueError( f"combined_data_data object is neither BivariateData nor MultivariateData: {type(combined_data)}" )
def add_categories_normal(dataset, combined_data: CombinedData, study_type: str, design: Dict[str, str] = None): global cat_distribution xs = None ys = None cat_xs = [] cont_ys = [] grouped_data = dict() if study_type == experiment_identifier: # Just need one variable to be Catogrical and another to be Continuous (regardless of role) -- both could be variable_identifier types xs = combined_data.get_vars(iv_identifier) ys = combined_data.get_vars(dv_identifier) else: # study_type == observational_identifier xs = combined_data.get_vars(contributor_identifier) ys = combined_data.get_vars(outcome_identifier) for x in xs: if x.is_categorical(): cat_xs.append(x) for y in ys: if y.is_continuous(): cont_ys.append(y) combined_data.properties[cat_distribution] = None if cat_xs and cont_ys: for y in ys: for x in xs: cat = [k for k, v in x.metadata[categories].items()] for c in cat: data = dataset.select( y.metadata[name], where=[f"{x.metadata[name]} == '{c}'"]) grouped_data_name = str(x.metadata[name] + ':' + c) grouped_data[grouped_data_name] = compute_distribution( data) combined_data.properties[cat_distribution] = dict() combined_data.properties[cat_distribution][ y.metadata[name] + '::' + x.metadata[name]] = grouped_data