information = information_raw[COLUMNS_TO_TAKE].rename(columns=COLUMNS_TO_TAKE) information["chamber"] = chamber list_information.append(information) columns_for_storage = ["id", "Sex", "age_category", "aging_rate", "Gif", "Picture"] information_for_storage = information_raw[columns_for_storage].set_index("id") information_for_storage["name_gif"] = information_for_storage["Gif"].apply(lambda path: path.split("/")[-1]) information_for_storage["name_picture"] = information_for_storage["Picture"].apply( lambda path: path.split("/")[-1] ) for id_to_store in information_for_storage.drop(columns=["Gif", "Picture"]).index: old_key_gif = f"page12_AttentionMapsVideos/gif/{information_for_storage.loc[id_to_store, 'name_gif']}" new_key_gif = f"feature_importances/videos/{chamber}_chambers/{information_for_storage.loc[id_to_store, 'Sex']}/{information_for_storage.loc[id_to_store, 'age_category']}/{information_for_storage.loc[id_to_store, 'aging_rate']}.gif" # copy_file(old_key_gif, new_key_gif) old_key_jpg = f"page12_AttentionMapsVideos/img/{information_for_storage.loc[id_to_store, 'name_picture']}" new_key_jpg = f"feature_importances/videos/{chamber}_chambers/{information_for_storage.loc[id_to_store, 'Sex']}/{information_for_storage.loc[id_to_store, 'age_category']}/{information_for_storage.loc[id_to_store, 'aging_rate']}.jpg" # copy_file(old_key_jpg, new_key_jpg) pd.concat(list_information).reset_index(drop=True).to_feather( "all_data/feature_importances/videos/information.feather" ) upload_file( "all_data/feature_importances/videos/information.feather", "feature_importances/videos/information.feather" )
from dash_website.utils.aws_loader import load_csv, upload_file if __name__ == "__main__": squeezed_dimensions = (load_csv( f"page2_predictions/Performances/PERFORMANCES_bestmodels_alphabetical_instances_Age_test.csv" )[["organ", "view", "R-Squared_all", "R-Squared_sd_all"]].rename( columns={ "organ": "dimension", "view": "subdimension", "R-Squared_all": "r2", "R-Squared_sd_all": "r2_std" }).replace({ "ImmuneSystem": "BloodCells" }).set_index("dimension")) squeezed_dimensions.loc["Lungs", "subdimension"] = "*" squeezed_dimensions.loc["Hearing", "subdimension"] = "*" squeezed_dimensions.reset_index(inplace=True) squeezed_dimensions["squeezed_dimensions"] = squeezed_dimensions[ "dimension"] + squeezed_dimensions["subdimension"].replace("*", "") squeezed_dimensions.to_feather( "all_data/xwas/squeezed_dimensions_participant_and_time_of_examination.feather" ) upload_file( "all_data/xwas/squeezed_dimensions_participant_and_time_of_examination.feather", "xwas/squeezed_dimensions_participant_and_time_of_examination.feather", )
)] = correlations_category.loc[PAIRS_MAIN_DIMENSIONS].std() averages_correlations.loc[("SubDimensions", category), ( subset_method, correlation_type, "mean" )] = correlations_category.loc[PAIRS_SUBDIMENSIONS].mean() averages_correlations.loc[("SubDimensions", category), ( subset_method, correlation_type, "std" )] = correlations_category.loc[PAIRS_SUBDIMENSIONS].std() for dimension in DIMENSIONS: correlations_independant = correlations_category.loc[ dimension].drop( index=([dimension] + DIMENSIONS_TO_EXCLUDE[dimension])) averages_correlations.loc[(dimension, category), ( subset_method, correlation_type, "mean")] = correlations_independant.mean() averages_correlations.loc[(dimension, category), ( subset_method, correlation_type, "std")] = correlations_independant.std() averages_correlations.columns = map(str, averages_correlations.columns.tolist()) averages_correlations.reset_index().to_feather( "all_data/xwas/univariate_correlations/averages_correlations.feather") upload_file( "all_data/xwas/univariate_correlations/averages_correlations.feather", "xwas/univariate_correlations/averages_correlations.feather", )
correlations.set_index(["dimension_1", "subdimension_1"], inplace=True) for squeezed_dimension in DIMENSIONS: dimension_1, subdimension_1 = SQUEEZED_DIMENSIONS.loc[ squeezed_dimension, ["dimension", "subdimension"]] correlations.loc[( dimension_1, subdimension_1 )].reset_index(drop=True).rename(columns={ "dimension_2": "dimension", "subdimension_2": "subdimension" }).to_feather( f"all_data/xwas/univariate_correlations/correlations/dimensions/correlations_{RENAME_DIMENSIONS.get(squeezed_dimension, squeezed_dimension)}.feather" ) upload_file( f"all_data/xwas/univariate_correlations/correlations/dimensions/correlations_{RENAME_DIMENSIONS.get(squeezed_dimension, squeezed_dimension)}.feather", f"xwas/univariate_correlations/correlations/dimensions/correlations_{RENAME_DIMENSIONS.get(squeezed_dimension, squeezed_dimension)}.feather", ) correlations.reset_index(inplace=True) correlations.set_index("category", inplace=True) SQUEEZED_DIMENSIONS.set_index(["dimension", "subdimension"], inplace=True) for category in MAIN_CATEGORIES_TO_CATEGORIES["All"] + [ f"All_{main_category}" for main_category in MAIN_CATEGORIES_TO_CATEGORIES.keys() ]: correlations_category = correlations.loc[category].reset_index( drop=True) for index_dimension in [1, 2]: correlations_category.set_index([
f"xwas/univariate_correlations/correlations/correlations.feather" ).set_index("category") for category in tqdm(EVERY_CATEGORIES): correlations = (every_correlation.loc[category].reset_index( drop=True).rename( columns={ "dimension_1": "dimensions_1", "dimension_2": "dimensions_2" })) for idx_dimension in ["1", "2"]: correlations.set_index(f"dimensions_{idx_dimension}", inplace=True) correlations[f"dimension_{idx_dimension}"] = squeezed_dimensions[ "dimension"] correlations[ f"subdimension_{idx_dimension}"] = squeezed_dimensions[ "subdimension"] correlations[f"r2_{idx_dimension}"] = squeezed_dimensions["r2"] correlations[f"r2_std_{idx_dimension}"] = squeezed_dimensions[ "r2_std"] correlations.reset_index(drop=True, inplace=True) correlations.to_feather( f"all_data/xwas/univariate_correlations/correlations/categories/correlations_{category}.feather" ) upload_file( f"all_data/xwas/univariate_correlations/correlations/categories/correlations_{category}.feather", f"xwas/univariate_correlations/correlations/categories/correlations_{category}.feather", )
if "medical_diagnoses_" in category: features = load_csv( f"page18_MultivariateXWASFeatures/FeatureImp_{DICT_TO_FORMER_CATEGORIES.get(category, category)}_{DICT_TO_FORMER_DIMENSIONS.get(dimension, dimension)}_{DICT_TO_FORMER_ALGORITHM.get(algorithm, algorithm)}.csv" ).rename(columns={ "features": "variable", "weight": "feature_importance" }) else: features = load_csv( f"page18_MultivariateXWASFeatures/FeatureImp_Clusters_{DICT_TO_FORMER_CATEGORIES.get(category, category)}_{DICT_TO_FORMER_DIMENSIONS.get(dimension, dimension)}_{DICT_TO_FORMER_ALGORITHM.get(algorithm, algorithm)}.csv" ).rename(columns={ "features": "variable", "weight": "feature_importance" }) features["variable"] = features["variable"].apply( lambda variable: variable.split(".0")[0]) features["category"] = category features["dimension"] = dimension features["algorithm"] = algorithm list_features.append(features) pd.concat(list_features).reset_index(drop=True).to_feather( "all_data/xwas/multivariate_feature_importances/feature_importances.feather" ) upload_file( "all_data/xwas/multivariate_feature_importances/feature_importances.feather", "xwas/multivariate_feature_importances/feature_importances.feather", )
# Merge with new scores for algorithm in CAMEL_TO_SNAKE.keys(): missing_scores = pd.read_csv( f"all_data/page7_MultivariateXWASResults/Scores/ScoresMissing_{algorithm}_test.csv" ) old_scores = pd.read_csv( f"all_data/page7_MultivariateXWASResults/Scores/Old_Scores_{algorithm}_test.csv" ) pd.concat( (missing_scores, old_scores), ignore_index=True ).drop(columns="Unnamed: 0").to_csv( f"all_data/page7_MultivariateXWASResults/Scores/Scores_{algorithm}_test.csv" ) upload_file( f"all_data/page7_MultivariateXWASResults/Scores/Scores_{algorithm}_test.csv", f"page7_MultivariateXWASResults/Scores/Scores_{algorithm}_test.csv", ) list_scores = [] for algorithm in CAMEL_TO_SNAKE.keys(): scores = load_csv( f"page7_MultivariateXWASResults/Scores/Scores_{algorithm}_test.csv", index_col=0).drop(columns="subset") scores.rename(columns={ "env_dataset": "category", "organ": "dimension" }, inplace=True) scores_cleaned_dimension = scores.set_index("dimension").rename(
from dash_website.utils.aws_loader import load_feather, upload_file if __name__ == "__main__": squeezed_dimensions = load_feather( "xwas/squeezed_dimensions_participant_and_time_of_examination.feather" ).set_index(["dimension", "subdimension"]) phenotypic = load_feather( "correlation_between_accelerated_aging_dimensions/custom_dimensions_all_samples_when_possible_otherwise_average.feather" )[[ "dimension_1", "subdimension_1", "dimension_2", "subdimension_2", "correlation" ]] for index_dimension in [1, 2]: phenotypic.set_index([ f"dimension_{index_dimension}", f"subdimension_{index_dimension}" ], inplace=True) phenotypic[ f"squeezed_dimension_{index_dimension}"] = squeezed_dimensions[ "squeezed_dimensions"] phenotypic.reset_index(drop=True, inplace=True) phenotypic.to_feather( "all_data/xwas/univariate_correlations/phenotypic.feather") upload_file("all_data/xwas/univariate_correlations/phenotypic.feather", "xwas/univariate_correlations/phenotypic.feather")
"1DCNN": "1dcnn", "3DCNN": "3dcnn", } if __name__ == "__main__": for sample_definition in ["instances", "eids"]: scores = load_csv( f"page2_predictions/Performances/PERFORMANCES_withEnsembles_withCI_alphabetical_{sample_definition}_Age_test.csv" )[COLUMNS_TO_TAKE].rename(columns=COLUMNS_TO_TAKE) for metric in ["r2", "rmse", "c_index", "c_index_difference"]: scores[metric] = scores[f"{metric}_and_std"].str.split( "+", expand=True)[0].astype(np.float32) scores[f"{metric}_std"] = (scores[f"{metric}_and_std"].str.split( "+", expand=True)[1].str.split("-", expand=True)[1].astype(np.float32)) scores.drop(columns=f"{metric}_and_std", inplace=True) scores.loc[(scores["dimension"] == "Musculoskeletal") & (scores["sub_subdimension"] == "MRI"), "sub_subdimension"] = "DXA" scores.replace(DICT_TO_CHANGE_DIMENSIONS).to_feather( f"all_data/age_prediction_performances/scores_{SAMPLE_DEFINITION_NAMING[sample_definition]}.feather" ) upload_file( f"all_data/age_prediction_performances/scores_{SAMPLE_DEFINITION_NAMING[sample_definition]}.feather", f"age_prediction_performances/scores_{SAMPLE_DEFINITION_NAMING[sample_definition]}.feather", )
"number_features")] = (indexed_features.loc[( squeezed_dimension_1, category, algorithm), "feature_importance"].drop( VARIABLES_TO_DROP).shape[0]) correlations.reset_index(inplace=True) for index_dimension in [1, 2]: correlations.set_index(f"squeezed_dimension_{index_dimension}", inplace=True) correlations[f"dimension_{index_dimension}"] = SQUEEZED_DIMENSIONS[ "dimension"] correlations[f"subdimension_{index_dimension}"] = SQUEEZED_DIMENSIONS[ "subdimension"] correlations.reset_index(drop=True) correlations.set_index([ "dimension_1", "subdimension_1", "dimension_2", "subdimension_2", "category" ], inplace=True) correlations.columns = map(str, correlations.columns.tolist()) correlations.reset_index().to_feather( "all_data/xwas/multivariate_correlations/correlations/correlations.feather" ) upload_file( "all_data/xwas/multivariate_correlations/correlations/correlations.feather", "xwas/multivariate_correlations/correlations/correlations.feather", )