information = information_raw[COLUMNS_TO_TAKE].rename(columns=COLUMNS_TO_TAKE)
        information["chamber"] = chamber

        list_information.append(information)

        columns_for_storage = ["id", "Sex", "age_category", "aging_rate", "Gif", "Picture"]
        information_for_storage = information_raw[columns_for_storage].set_index("id")

        information_for_storage["name_gif"] = information_for_storage["Gif"].apply(lambda path: path.split("/")[-1])
        information_for_storage["name_picture"] = information_for_storage["Picture"].apply(
            lambda path: path.split("/")[-1]
        )

        for id_to_store in information_for_storage.drop(columns=["Gif", "Picture"]).index:
            old_key_gif = f"page12_AttentionMapsVideos/gif/{information_for_storage.loc[id_to_store, 'name_gif']}"
            new_key_gif = f"feature_importances/videos/{chamber}_chambers/{information_for_storage.loc[id_to_store, 'Sex']}/{information_for_storage.loc[id_to_store, 'age_category']}/{information_for_storage.loc[id_to_store, 'aging_rate']}.gif"

            # copy_file(old_key_gif, new_key_gif)

            old_key_jpg = f"page12_AttentionMapsVideos/img/{information_for_storage.loc[id_to_store, 'name_picture']}"
            new_key_jpg = f"feature_importances/videos/{chamber}_chambers/{information_for_storage.loc[id_to_store, 'Sex']}/{information_for_storage.loc[id_to_store, 'age_category']}/{information_for_storage.loc[id_to_store, 'aging_rate']}.jpg"

            # copy_file(old_key_jpg, new_key_jpg)

    pd.concat(list_information).reset_index(drop=True).to_feather(
        "all_data/feature_importances/videos/information.feather"
    )
    upload_file(
        "all_data/feature_importances/videos/information.feather", "feature_importances/videos/information.feather"
    )
from dash_website.utils.aws_loader import load_csv, upload_file

if __name__ == "__main__":
    squeezed_dimensions = (load_csv(
        f"page2_predictions/Performances/PERFORMANCES_bestmodels_alphabetical_instances_Age_test.csv"
    )[["organ", "view", "R-Squared_all", "R-Squared_sd_all"]].rename(
        columns={
            "organ": "dimension",
            "view": "subdimension",
            "R-Squared_all": "r2",
            "R-Squared_sd_all": "r2_std"
        }).replace({
            "ImmuneSystem": "BloodCells"
        }).set_index("dimension"))
    squeezed_dimensions.loc["Lungs", "subdimension"] = "*"
    squeezed_dimensions.loc["Hearing", "subdimension"] = "*"
    squeezed_dimensions.reset_index(inplace=True)
    squeezed_dimensions["squeezed_dimensions"] = squeezed_dimensions[
        "dimension"] + squeezed_dimensions["subdimension"].replace("*", "")

    squeezed_dimensions.to_feather(
        "all_data/xwas/squeezed_dimensions_participant_and_time_of_examination.feather"
    )
    upload_file(
        "all_data/xwas/squeezed_dimensions_participant_and_time_of_examination.feather",
        "xwas/squeezed_dimensions_participant_and_time_of_examination.feather",
    )
                )] = correlations_category.loc[PAIRS_MAIN_DIMENSIONS].std()

                averages_correlations.loc[("SubDimensions", category), (
                    subset_method, correlation_type, "mean"
                )] = correlations_category.loc[PAIRS_SUBDIMENSIONS].mean()
                averages_correlations.loc[("SubDimensions", category), (
                    subset_method, correlation_type, "std"
                )] = correlations_category.loc[PAIRS_SUBDIMENSIONS].std()

                for dimension in DIMENSIONS:
                    correlations_independant = correlations_category.loc[
                        dimension].drop(
                            index=([dimension] +
                                   DIMENSIONS_TO_EXCLUDE[dimension]))

                    averages_correlations.loc[(dimension, category), (
                        subset_method, correlation_type,
                        "mean")] = correlations_independant.mean()
                    averages_correlations.loc[(dimension, category), (
                        subset_method, correlation_type,
                        "std")] = correlations_independant.std()

    averages_correlations.columns = map(str,
                                        averages_correlations.columns.tolist())
    averages_correlations.reset_index().to_feather(
        "all_data/xwas/univariate_correlations/averages_correlations.feather")
    upload_file(
        "all_data/xwas/univariate_correlations/averages_correlations.feather",
        "xwas/univariate_correlations/averages_correlations.feather",
    )
Exemple #4
0
    correlations.set_index(["dimension_1", "subdimension_1"], inplace=True)

    for squeezed_dimension in DIMENSIONS:
        dimension_1, subdimension_1 = SQUEEZED_DIMENSIONS.loc[
            squeezed_dimension, ["dimension", "subdimension"]]
        correlations.loc[(
            dimension_1, subdimension_1
        )].reset_index(drop=True).rename(columns={
            "dimension_2": "dimension",
            "subdimension_2": "subdimension"
        }).to_feather(
            f"all_data/xwas/univariate_correlations/correlations/dimensions/correlations_{RENAME_DIMENSIONS.get(squeezed_dimension, squeezed_dimension)}.feather"
        )
        upload_file(
            f"all_data/xwas/univariate_correlations/correlations/dimensions/correlations_{RENAME_DIMENSIONS.get(squeezed_dimension, squeezed_dimension)}.feather",
            f"xwas/univariate_correlations/correlations/dimensions/correlations_{RENAME_DIMENSIONS.get(squeezed_dimension, squeezed_dimension)}.feather",
        )

    correlations.reset_index(inplace=True)
    correlations.set_index("category", inplace=True)
    SQUEEZED_DIMENSIONS.set_index(["dimension", "subdimension"], inplace=True)

    for category in MAIN_CATEGORIES_TO_CATEGORIES["All"] + [
            f"All_{main_category}"
            for main_category in MAIN_CATEGORIES_TO_CATEGORIES.keys()
    ]:
        correlations_category = correlations.loc[category].reset_index(
            drop=True)

        for index_dimension in [1, 2]:
            correlations_category.set_index([
        f"xwas/univariate_correlations/correlations/correlations.feather"
    ).set_index("category")

    for category in tqdm(EVERY_CATEGORIES):
        correlations = (every_correlation.loc[category].reset_index(
            drop=True).rename(
                columns={
                    "dimension_1": "dimensions_1",
                    "dimension_2": "dimensions_2"
                }))

        for idx_dimension in ["1", "2"]:
            correlations.set_index(f"dimensions_{idx_dimension}", inplace=True)
            correlations[f"dimension_{idx_dimension}"] = squeezed_dimensions[
                "dimension"]
            correlations[
                f"subdimension_{idx_dimension}"] = squeezed_dimensions[
                    "subdimension"]
            correlations[f"r2_{idx_dimension}"] = squeezed_dimensions["r2"]
            correlations[f"r2_std_{idx_dimension}"] = squeezed_dimensions[
                "r2_std"]
            correlations.reset_index(drop=True, inplace=True)

        correlations.to_feather(
            f"all_data/xwas/univariate_correlations/correlations/categories/correlations_{category}.feather"
        )
        upload_file(
            f"all_data/xwas/univariate_correlations/correlations/categories/correlations_{category}.feather",
            f"xwas/univariate_correlations/correlations/categories/correlations_{category}.feather",
        )
                if "medical_diagnoses_" in category:
                    features = load_csv(
                        f"page18_MultivariateXWASFeatures/FeatureImp_{DICT_TO_FORMER_CATEGORIES.get(category, category)}_{DICT_TO_FORMER_DIMENSIONS.get(dimension, dimension)}_{DICT_TO_FORMER_ALGORITHM.get(algorithm, algorithm)}.csv"
                    ).rename(columns={
                        "features": "variable",
                        "weight": "feature_importance"
                    })
                else:
                    features = load_csv(
                        f"page18_MultivariateXWASFeatures/FeatureImp_Clusters_{DICT_TO_FORMER_CATEGORIES.get(category, category)}_{DICT_TO_FORMER_DIMENSIONS.get(dimension, dimension)}_{DICT_TO_FORMER_ALGORITHM.get(algorithm, algorithm)}.csv"
                    ).rename(columns={
                        "features": "variable",
                        "weight": "feature_importance"
                    })

                features["variable"] = features["variable"].apply(
                    lambda variable: variable.split(".0")[0])
                features["category"] = category
                features["dimension"] = dimension
                features["algorithm"] = algorithm

                list_features.append(features)

    pd.concat(list_features).reset_index(drop=True).to_feather(
        "all_data/xwas/multivariate_feature_importances/feature_importances.feather"
    )
    upload_file(
        "all_data/xwas/multivariate_feature_importances/feature_importances.feather",
        "xwas/multivariate_feature_importances/feature_importances.feather",
    )
Exemple #7
0
    # Merge with new scores
    for algorithm in CAMEL_TO_SNAKE.keys():
        missing_scores = pd.read_csv(
            f"all_data/page7_MultivariateXWASResults/Scores/ScoresMissing_{algorithm}_test.csv"
        )
        old_scores = pd.read_csv(
            f"all_data/page7_MultivariateXWASResults/Scores/Old_Scores_{algorithm}_test.csv"
        )

        pd.concat(
            (missing_scores, old_scores), ignore_index=True
        ).drop(columns="Unnamed: 0").to_csv(
            f"all_data/page7_MultivariateXWASResults/Scores/Scores_{algorithm}_test.csv"
        )
        upload_file(
            f"all_data/page7_MultivariateXWASResults/Scores/Scores_{algorithm}_test.csv",
            f"page7_MultivariateXWASResults/Scores/Scores_{algorithm}_test.csv",
        )

    list_scores = []

    for algorithm in CAMEL_TO_SNAKE.keys():
        scores = load_csv(
            f"page7_MultivariateXWASResults/Scores/Scores_{algorithm}_test.csv",
            index_col=0).drop(columns="subset")
        scores.rename(columns={
            "env_dataset": "category",
            "organ": "dimension"
        },
                      inplace=True)

        scores_cleaned_dimension = scores.set_index("dimension").rename(
Exemple #8
0
from dash_website.utils.aws_loader import load_feather, upload_file

if __name__ == "__main__":
    squeezed_dimensions = load_feather(
        "xwas/squeezed_dimensions_participant_and_time_of_examination.feather"
    ).set_index(["dimension", "subdimension"])

    phenotypic = load_feather(
        "correlation_between_accelerated_aging_dimensions/custom_dimensions_all_samples_when_possible_otherwise_average.feather"
    )[[
        "dimension_1", "subdimension_1", "dimension_2", "subdimension_2",
        "correlation"
    ]]

    for index_dimension in [1, 2]:
        phenotypic.set_index([
            f"dimension_{index_dimension}", f"subdimension_{index_dimension}"
        ],
                             inplace=True)
        phenotypic[
            f"squeezed_dimension_{index_dimension}"] = squeezed_dimensions[
                "squeezed_dimensions"]
        phenotypic.reset_index(drop=True, inplace=True)

    phenotypic.to_feather(
        "all_data/xwas/univariate_correlations/phenotypic.feather")
    upload_file("all_data/xwas/univariate_correlations/phenotypic.feather",
                "xwas/univariate_correlations/phenotypic.feather")
Exemple #9
0
    "1DCNN": "1dcnn",
    "3DCNN": "3dcnn",
}

if __name__ == "__main__":
    for sample_definition in ["instances", "eids"]:
        scores = load_csv(
            f"page2_predictions/Performances/PERFORMANCES_withEnsembles_withCI_alphabetical_{sample_definition}_Age_test.csv"
        )[COLUMNS_TO_TAKE].rename(columns=COLUMNS_TO_TAKE)

        for metric in ["r2", "rmse", "c_index", "c_index_difference"]:
            scores[metric] = scores[f"{metric}_and_std"].str.split(
                "+", expand=True)[0].astype(np.float32)
            scores[f"{metric}_std"] = (scores[f"{metric}_and_std"].str.split(
                "+",
                expand=True)[1].str.split("-",
                                          expand=True)[1].astype(np.float32))

            scores.drop(columns=f"{metric}_and_std", inplace=True)

        scores.loc[(scores["dimension"] == "Musculoskeletal") &
                   (scores["sub_subdimension"] == "MRI"),
                   "sub_subdimension"] = "DXA"
        scores.replace(DICT_TO_CHANGE_DIMENSIONS).to_feather(
            f"all_data/age_prediction_performances/scores_{SAMPLE_DEFINITION_NAMING[sample_definition]}.feather"
        )
        upload_file(
            f"all_data/age_prediction_performances/scores_{SAMPLE_DEFINITION_NAMING[sample_definition]}.feather",
            f"age_prediction_performances/scores_{SAMPLE_DEFINITION_NAMING[sample_definition]}.feather",
        )
Exemple #10
0
                              "number_features")] = (indexed_features.loc[(
                                  squeezed_dimension_1, category,
                                  algorithm), "feature_importance"].drop(
                                      VARIABLES_TO_DROP).shape[0])

    correlations.reset_index(inplace=True)

    for index_dimension in [1, 2]:
        correlations.set_index(f"squeezed_dimension_{index_dimension}",
                               inplace=True)
        correlations[f"dimension_{index_dimension}"] = SQUEEZED_DIMENSIONS[
            "dimension"]
        correlations[f"subdimension_{index_dimension}"] = SQUEEZED_DIMENSIONS[
            "subdimension"]
        correlations.reset_index(drop=True)

    correlations.set_index([
        "dimension_1", "subdimension_1", "dimension_2", "subdimension_2",
        "category"
    ],
                           inplace=True)
    correlations.columns = map(str, correlations.columns.tolist())

    correlations.reset_index().to_feather(
        "all_data/xwas/multivariate_correlations/correlations/correlations.feather"
    )
    upload_file(
        "all_data/xwas/multivariate_correlations/correlations/correlations.feather",
        "xwas/multivariate_correlations/correlations/correlations.feather",
    )