Esempio n. 1
0
def add_paired_property(
        dataset,
        combined_data: CombinedData,
        study_type: str,
        design: Dict[str, str] = None):  # check same sizes are identical
    global paired

    x = None
    y = None
    combined_data.properties[paired] = False
    if isinstance(combined_data, BivariateData):
        if study_type == experiment_identifier:
            # Just need one variable to be Categorical and another to be Continuous (regardless of role)
            x = combined_data.get_vars(iv_identifier)
            y = combined_data.get_vars(dv_identifier)

        else:  # study_type == observational_identifier
            x = combined_data.get_vars(contributor_identifier)
            y = combined_data.get_vars(outcome_identifier)

        if x and y:
            assert (len(x) == len(y) == 1)
            x = x[0]
            y = y[0]

            if x.is_categorical() and y.is_continuous():
                if within_subj in design and design[within_subj] == x.metadata[
                        name]:
                    combined_data.properties[paired] = True
Esempio n. 2
0
def add_eq_variance_property(dataset, combined_data: CombinedData,
                             study_type: str):
    xs = None
    ys = None
    cat_xs = []
    cont_ys = []
    grouped_data = []

    if study_type == experiment_identifier:
        # Just need one variable to be Catogrical and another to be Continuous (regardless of role) -- both could be variable_identifier types
        xs = combined_data.get_vars(iv_identifier)
        ys = combined_data.get_vars(dv_identifier)

    else:  # study_type == observational_identifier
        xs = combined_data.get_vars(contributor_identifier)
        ys = combined_data.get_vars(outcome_identifier)

    for x in xs:
        if x.is_categorical():
            cat_xs.append(x)

    for y in ys:
        if y.is_continuous():
            cont_ys.append(y)

    combined_data.properties[eq_variance] = None

    if cat_xs and cont_ys:
        for y in ys:
            for x in xs:
                cat = [k for k, v in x.metadata[categories].items()]
                for c in cat:
                    data = dataset.select(
                        y.metadata[name],
                        where=[f"{x.metadata[name]} == '{c}'"])
                    grouped_data.append(data)
                if isinstance(combined_data, BivariateData):
                    # Equal variance
                    eq_var = compute_eq_variance(grouped_data)
                    combined_data.properties[eq_variance] = eq_var
                elif isinstance(combined_data, MultivariateData):
                    combined_data.properties[
                        eq_variance + '::' + x.metadata[name] + ':' +
                        y.metadata[name]] = compute_eq_variance(grouped_data)
                else:
                    raise ValueError(
                        f"combined_data_data object is neither BivariateData nor MultivariateData: {type(combined_data)}"
                    )
Esempio n. 3
0
def add_categories_normal(dataset,
                          combined_data: CombinedData,
                          study_type: str,
                          design: Dict[str, str] = None):
    global cat_distribution

    xs = None
    ys = None
    cat_xs = []
    cont_ys = []
    grouped_data = dict()

    if study_type == experiment_identifier:
        # Just need one variable to be Catogrical and another to be Continuous (regardless of role) -- both could be variable_identifier types
        xs = combined_data.get_vars(iv_identifier)
        ys = combined_data.get_vars(dv_identifier)

    else:  # study_type == observational_identifier
        xs = combined_data.get_vars(contributor_identifier)
        ys = combined_data.get_vars(outcome_identifier)

    for x in xs:
        if x.is_categorical():
            cat_xs.append(x)

    for y in ys:
        if y.is_continuous():
            cont_ys.append(y)

    combined_data.properties[cat_distribution] = None

    if cat_xs and cont_ys:
        for y in ys:
            for x in xs:
                cat = [k for k, v in x.metadata[categories].items()]
                for c in cat:
                    data = dataset.select(
                        y.metadata[name],
                        where=[f"{x.metadata[name]} == '{c}'"])
                    grouped_data_name = str(x.metadata[name] + ':' + c)
                    grouped_data[grouped_data_name] = compute_distribution(
                        data)
                combined_data.properties[cat_distribution] = dict()
                combined_data.properties[cat_distribution][
                    y.metadata[name] + '::' + x.metadata[name]] = grouped_data