def get_controls_comparison(order, default_category): return dbc.Card([ get_drop_down( f"{order}_category_comparison", MAIN_CATEGORIES_TO_CATEGORIES["All"] + [ f"All_{main_category}" for main_category in MAIN_CATEGORIES_TO_CATEGORIES.keys() ], f"Select {order} category to compare: ", from_dict=False, value=default_category, ), html.Div( [ get_item_radio_items(f"{order}_uni_or_multi_comparison", UNIVARIATE_OR_MULTIVARIATE, "Select the type of XWAS :"), get_item_radio_items(f"{order}_method_comparison", SUBSET_METHODS, "Select method :"), get_item_radio_items(f"{order}_correlation_type_comparison", CORRELATION_TYPES, "Select correlation type :"), ], id=f"{order}_hiden_settings", style={"display": "none"}, ), ])
def get_controls_tab_category_multivariate(): categories = pd.Index(MAIN_CATEGORIES_TO_CATEGORIES["All"]).drop( MULTIVARIATE_CATEGORIES_TO_REMOVE) return dbc.Card([ get_item_radio_items( "main_category_category_multivariate", list(MAIN_CATEGORIES_TO_CATEGORIES.keys()), "Select X main category: ", from_dict=False, ), get_drop_down("category_category_multivariate", categories, "Select X subcategory: ", from_dict=False), get_item_radio_items("order_type_category_multivariate", ORDER_TYPES, "Order by:"), get_item_radio_items( "algorithm_category", { "elastic_net": ALGORITHMS["elastic_net"], "light_gbm": ALGORITHMS["light_gbm"], "neural_network": ALGORITHMS["neural_network"], }, "Select an Algorithm :", ), get_item_radio_items("correlation_type_category_multivariate", CORRELATION_TYPES, "Select correlation type :"), ])
def get_controls_tab_bar_plot_multivariate_results(): return dbc.Card([ get_item_radio_items( "main_category_bar_plot_multivariate_results", list(MAIN_CATEGORIES_TO_CATEGORIES.keys()), "Select X main category: ", from_dict=False, ), get_drop_down( "dimension_bar_plot_multivariate_results", DIMENSIONS_SUBDIMENSIONS, "Select an aging dimension : ", ), get_item_radio_items( "display_mode_bar_plot_multivariate_results", DISPLAY_MODE, "Rank by : ", ), get_item_radio_items( "algorithm_bar_plot_multivariate_results", { "best_algorithm": ALGORITHMS["best_algorithm"], "elastic_net": ALGORITHMS["elastic_net"], "light_gbm": ALGORITHMS["light_gbm"], "neural_network": ALGORITHMS["neural_network"], }, "Select an Algorithm :", ), ])
def get_controls_tab_heatmap_multivariate_results(): return dbc.Card([ get_item_radio_items( "main_category_heatmap_multivariate_results", list(MAIN_CATEGORIES_TO_CATEGORIES.keys()), "Select X main category: ", from_dict=False, ), get_item_radio_items( "algorithm_heatmap_multivariate_results", { "best_algorithm": ALGORITHMS["best_algorithm"], "elastic_net": ALGORITHMS["elastic_net"], "light_gbm": ALGORITHMS["light_gbm"], "neural_network": ALGORITHMS["neural_network"], }, "Select an Algorithm :", ), html.Div( [ html.H5("Composition of the best algorithm"), dcc.Loading( dcc.Graph(id="pie_chart_heatmap_multivariate_results", config=DOWNLOAD_CONFIG)), ], id="div_pie_chart_heatmap_multivariate_results", style={"display": "none"}, ), ])
def get_controls_tab(): return dbc.Card( [ get_item_radio_items( "main_category_univariate_summary", list(MAIN_CATEGORIES_TO_CATEGORIES.keys()), "Select X main category: ", from_dict=False, ), get_item_radio_items("item_univariate_summary", ITEMS_LEGEND, "Select :"), ] )
def get_controls_features_multivariate(): return dbc.Card([ get_item_radio_items( "main_category_features_multivariate", list(MAIN_CATEGORIES_TO_CATEGORIES.keys()), "Select X main category: ", from_dict=False, ), get_drop_down("category_features_multivariate", ["..."], "Select X subcategory: ", from_dict=False), get_drop_down("dimension_subdimension_features_multivariate", DIMENSIONS_SUBDIMENSIONS, "Select an aging dimension: "), ])
def get_controls_tab_average_multivariate(): main_dimensions_subdimension = { "MainDimensions": "MainDimensions", "SubDimensions": "SubDimensions" } main_dimensions_subdimension.update(DIMENSIONS_SUBDIMENSIONS) average_dimensions_subdimension = {"average": "average"} average_dimensions_subdimension.update(DIMENSIONS_SUBDIMENSIONS) return dbc.Card([ get_item_radio_items( "main_category_average_multivariate", list(MAIN_CATEGORIES_TO_CATEGORIES.keys()), "Select X main category: ", from_dict=False, ), get_drop_down( "dimension_subdimension_1_average_multivariate", main_dimensions_subdimension, "Select an aging dimension 1: ", ), html.Div( [ get_drop_down( "dimension_subdimension_2_average_multivariate", average_dimensions_subdimension, "Select an aging dimension 2: ", ) ], id="hiden_dimension_subdimension_2_average_multivariate", style={"display": "none"}, ), get_item_radio_items( "display_mode_average_multivariate", DISPLAY_MODE, "Rank by : ", ), get_item_radio_items( "algorithm_average_multivariate", { "elastic_net": ALGORITHMS["elastic_net"], "light_gbm": ALGORITHMS["light_gbm"], "neural_network": ALGORITHMS["neural_network"], }, "Select an algorithm :", ), get_item_radio_items("correlation_type_average_multivariate", CORRELATION_TYPES, "Select correlation type :"), ])
def get_controls_tab_univariate_category(): return dbc.Card( [ get_item_radio_items( "main_category_univariate_category", list(MAIN_CATEGORIES_TO_CATEGORIES.keys()), "Select X main category: ", from_dict=False, ), get_drop_down("category_univariate_category", ["All"], "Select X subcategory: ", from_dict=False), get_item_radio_items("order_type_univariate_category", ORDER_TYPES, "Order by:"), get_item_radio_items("subset_method_univariate_category", SUBSET_METHODS, "Select subset method :"), get_item_radio_items( "correlation_type_univariate_category", CORRELATION_TYPES, "Select correlation type :" ), ] )
def get_controls_tab_univariate_volcano(): return dbc.Card([ get_item_radio_items( "main_category_univariate_volcano", list(MAIN_CATEGORIES_TO_CATEGORIES.keys()), "Select X main category: ", from_dict=False, ), get_drop_down("category_univariate_volcano", ["All"], "Select X subcategory: ", from_dict=False), get_drop_down( "dimension_univariate_volcano", DIMENSIONS_SUBDIMENSIONS, "Select an aging dimension: ", ), ])
def get_controls_tab_univariate_average(): main_dimensions_subdimension = { "MainDimensions": "MainDimensions", "SubDimensions": "SubDimensions" } main_dimensions_subdimension.update(DIMENSIONS_SUBDIMENSIONS) average_dimensions_subdimension = {"average": "average"} average_dimensions_subdimension.update(DIMENSIONS_SUBDIMENSIONS) return dbc.Card([ get_item_radio_items( "main_category_univariate_average", list(MAIN_CATEGORIES_TO_CATEGORIES.keys()), "Select X main category: ", from_dict=False, ), get_drop_down( "dimension_subdimension_1_univariate_average", main_dimensions_subdimension, "Select an aging dimension 1: ", ), html.Div( [ get_drop_down( "dimension_subdimension_2_univariate_average", average_dimensions_subdimension, "Select an aging dimension 2: ", ) ], id="hiden_dimension_subdimension_2_univariate_average", style={"display": "none"}, ), get_item_radio_items( "display_mode_univariate_average", DISPLAY_MODE, "Rank by : ", ), get_item_radio_items("subset_method_univariate_average", SUBSET_METHODS, "Select subset method :"), get_item_radio_items("correlation_type_univariate_average", CORRELATION_TYPES, "Select correlation type :"), ])
def _fill_graph_tab_univariate_average( subset_method, correlation_type, main_category, dimension_subdimension_1, dimension_subdimension_2, display_mode, data_correlations, data_averages, ): import plotly.graph_objs as go if main_category == "All": all_main_categories = [ f"All_{main_cat}" for main_cat in MAIN_CATEGORIES_TO_CATEGORIES.keys() ] else: all_main_categories = [f"All_{main_category}"] if dimension_subdimension_2 == "average": averages = pd.DataFrame(data_averages).set_index( ["dimension", "category"]) averages.columns = pd.MultiIndex.from_tuples( list(map(eval, averages.columns.tolist())), names=["subset_method", "correlation_type", "observation"]) sorted_averages = averages.loc[( dimension_subdimension_1, MAIN_CATEGORIES_TO_CATEGORIES[main_category] + all_main_categories), (subset_method, correlation_type), ].sort_values( by=["mean"], ascending=False) if display_mode == "view_all": bars = go.Bar( x=sorted_averages.index.get_level_values("category"), y=sorted_averages["mean"], error_y={ "array": sorted_averages["std"], "type": "data" }, name="Average correlations", marker_color="indianred", ) else: # display_mode == view_per_main_category then main_category = All list_main_category = [] list_categories = [] # Get the ranking of subcategories per main category for main_category_group in MAIN_CATEGORIES_TO_CATEGORIES.keys(): if main_category_group == "All": continue sorted_categories = (sorted_averages.swaplevel().loc[ MAIN_CATEGORIES_TO_CATEGORIES[main_category_group] + [f"All_{main_category_group}"]].sort_values( by=["mean"], ascending=False)) sorted_index_categories = sorted_categories.index.get_level_values( "category") list_categories.extend(sorted_index_categories) list_main_category.extend([main_category_group] * len(sorted_index_categories)) bars = go.Bar( x=[ list_main_category + ["", "", ""], list_categories + ["FamilyHistory", "Genetics", "Phenotypic"] ], y=sorted_averages["mean"].swaplevel() [list_categories + ["FamilyHistory", "Genetics", "Phenotypic"]], error_y={ "array": sorted_averages["std"].swaplevel() [list_categories + ["FamilyHistory", "Genetics", "Phenotypic"]], "type": "data", }, name="Correlations", marker_color="indianred", ) title = f"Average average correlation across aging dimensions and X categories = {sorted_averages['mean'].mean().round(3)} +- {sorted_averages['mean'].std().round(3)}" y_label = "Average correlation" else: correlations_raw = pd.DataFrame(data_correlations).set_index( ["dimension", "subdimension", "category"]) correlations_raw.columns = pd.MultiIndex.from_tuples( list(map(eval, correlations_raw.columns.tolist())), names=["subset_method", "correlation_type"]) correlations_raw.reset_index(inplace=True) correlations_raw["squeezed_dimension"] = correlations_raw[ "dimension"] + correlations_raw["subdimension"].replace("*", "") correlations_raw = correlations_raw.drop( columns=["dimension", "subdimension"]).set_index( ["squeezed_dimension", "category"]) sorted_correlations = correlations_raw.loc[( dimension_subdimension_2, MAIN_CATEGORIES_TO_CATEGORIES[main_category] + all_main_categories), (subset_method, correlation_type), ].sort_values( ascending=False) if display_mode == "view_decreasing": bars = go.Bar( x=sorted_correlations.index.get_level_values("category"), y=sorted_correlations, name="Correlations", marker_color="indianred", ) else: # display_mode == view_per_main_category list_main_category = [] list_categories = [] # Get the ranking of subcategories per main category for main_category_group in MAIN_CATEGORIES_TO_CATEGORIES.keys(): if main_category_group == "All": continue sorted_categories = (sorted_correlations.swaplevel().loc[ MAIN_CATEGORIES_TO_CATEGORIES[main_category_group] + [f"All_{main_category_group}"]].sort_values( ascending=False)) sorted_index_categories = sorted_categories.index.get_level_values( "category") list_categories.extend(sorted_index_categories) list_main_category.extend([main_category_group] * len(sorted_index_categories)) bars = go.Bar( x=[ list_main_category + ["", "", ""], list_categories + ["FamilyHistory", "Genetics", "Phenotypic"] ], y=sorted_correlations.swaplevel() [list_categories + ["FamilyHistory", "Genetics", "Phenotypic"]], name="Correlations", marker_color="indianred", ) title = f"Average correlation = {sorted_correlations.mean().round(3)} +- {sorted_correlations.std().round(3)}" y_label = "Correlation" fig = go.Figure(bars) fig.update_layout({ "width": 2000, "height": 800, "xaxis": { "title": "X subcategory", "tickangle": 90, "showgrid": False, "title_font": { "size": 25 } }, "yaxis": { "title": y_label, "title_font": { "size": 25 } }, "margin": { "l": 0, "r": 0, "b": 280, "t": 0 }, }) return fig, title
"MusculoskeletalKnees", "MusculoskeletalScalars", "MusculoskeletalSpine", ], "MusculoskeletalFullBody": ["Musculoskeletal"], "MusculoskeletalHips": ["Musculoskeletal"], "MusculoskeletalKnees": ["Musculoskeletal"], "MusculoskeletalScalars": ["Musculoskeletal"], "MusculoskeletalSpine": ["Musculoskeletal"], "PhysicalActivity": [], } FULL_CATEGORY = (MAIN_CATEGORIES_TO_CATEGORIES["All"] + ["Phenotypic", "Genetics"] + [ f"All_{main_category}" for main_category in MAIN_CATEGORIES_TO_CATEGORIES.keys() ]) if __name__ == "__main__": correlations_raw = load_feather( f"xwas/univariate_correlations/correlations.feather").set_index([ "dimension_1", "subdimension_1", "dimension_2", "subdimension_2", "category" ]) correlations_raw.columns = pd.MultiIndex.from_tuples( list(map(eval, correlations_raw.columns.tolist())), names=["subset_method", "correlation_type"]) correlations_raw.reset_index(inplace=True) for index_dimension in [1, 2]: correlations_raw[ f"squeezed_dimension_{index_dimension}"] = correlations_raw[
def get_graph_average(dimension_1, data_averages): import plotly.graph_objs as go averages = pd.DataFrame(data_averages).set_index(["dimension", "category"]) averages.columns = pd.MultiIndex.from_tuples( list(map(eval, averages.columns.tolist())), names=["subset_method", "correlation_type", "observation"]) sorted_averages = averages.loc[( dimension_1, MAIN_CATEGORIES_TO_CATEGORIES["All"] + [ f"All_{main_category}" for main_category in MAIN_CATEGORIES_TO_CATEGORIES.keys() ], ), ("union", "pearson"), ].sort_values(by=["mean"], ascending=False) list_main_category = [] list_categories = [] # Get the ranking of subcategories per main category for main_category_group in MAIN_CATEGORIES_TO_CATEGORIES.keys(): if main_category_group == "All": continue sorted_categories = (sorted_averages.swaplevel().loc[ MAIN_CATEGORIES_TO_CATEGORIES[main_category_group] + [f"All_{main_category_group}"]].sort_values(by=["mean"], ascending=False)) print( "main_category", main_category_group, sorted_categories["mean"].mean().round(3), "+-", sorted_categories["mean"].std().round(3), ) sorted_index_categories = sorted_categories.index.get_level_values( "category") list_categories.extend(sorted_index_categories) list_main_category.extend([main_category_group] * len(sorted_index_categories)) bars = go.Bar( x=[ list_main_category + ["", "", ""], list_categories + ["FamilyHistory", "Genetics", "Phenotypic"] ], y=sorted_averages["mean"].swaplevel()[ list_categories + ["FamilyHistory", "Genetics", "Phenotypic"]], error_y={ "array": sorted_averages["std"].swaplevel()[ list_categories + ["FamilyHistory", "Genetics", "Phenotypic"]], "type": "data", }, name="Correlations", marker_color="indianred", ) title = f"Average average correlation across aging dimensions and X categories = {sorted_averages['mean'].mean().round(3)} +- {sorted_averages['mean'].std().round(3)}" y_label = "Average correlation" fig = go.Figure(bars) fig.update_layout({ "width": 2000, "height": 700, "xaxis": { "title": "X subcategory", "tickangle": 90, "showgrid": False, "title_font": { "size": 25 } }, "yaxis": { "title": y_label, "title_font": { "size": 25 } }, "margin": { "l": 0, "r": 0, "b": 0, "t": 0 }, }) print(title) return fig
def _fill_graph_tab_bar_plot(main_category, dimension, algorithm, display_mode, data_scores): import plotly.graph_objs as go if algorithm == "best_algorithm": every_score_every_dimension = (pd.DataFrame(data_scores).groupby( by=["category", "dimension"] ).apply(lambda score_category_dimension: score_category_dimension.iloc[ score_category_dimension["r2"].argmax()]).reset_index(drop=True)) every_score = every_score_every_dimension.set_index( "dimension").loc[dimension].set_index("category") else: every_score = (pd.DataFrame(data_scores).set_index([ "algorithm", "dimension" ]).loc[(algorithm, dimension)].reset_index().set_index("category")) multivariate_categories = MAIN_CATEGORIES_TO_CATEGORIES[ main_category].copy() for to_remove in MULTIVARIATE_CATEGORIES_TO_REMOVE: if to_remove in multivariate_categories: multivariate_categories.remove(to_remove) scores = every_score.loc[multivariate_categories].sort_values( by=["r2"], ascending=False) hovertemplate = "X subcategory: %{x} <br>R²: %{y:.3f} +- %{customdata[0]:.3f} <br><extra>%{customdata[1]}</extra>" if display_mode == "view_decreasing": bars = go.Bar( x=scores.index, y=scores["r2"], error_y={ "array": scores["std"], "type": "data" }, name=SCORES["r2"], marker_color="indianred", hovertemplate=hovertemplate, customdata=scores[["std", "algorithm"]], ) else: # display_mode == view_per_main_category list_main_category = [] list_categories = [] # Get the ranking of subcategories per main category for main_category_group in MAIN_CATEGORIES_TO_CATEGORIES.keys(): if main_category_group == "All": continue sorted_index_categories = (scores.loc[scores.index.isin( MAIN_CATEGORIES_TO_CATEGORIES[main_category_group] + [f"All_{main_category_group}"])].sort_values( by=["r2"], ascending=False)).index list_categories.extend(sorted_index_categories) list_main_category.extend([main_category_group] * len(sorted_index_categories)) if main_category == "All": list_categories += ["FamilyHistory"] list_main_category += [""] bars = go.Bar( x=[list_main_category, list_categories], y=scores.loc[list_categories, "r2"], error_y={ "array": scores["std"], "type": "data" }, name=SCORES["r2"], marker_color="indianred", hovertemplate=hovertemplate, customdata=scores.loc[list_categories, ["std", "algorithm"]], ) fig = go.Figure(bars) fig.update_layout({ "height": 800, "xaxis": { "title": "X subcategory", "tickangle": 90, "showgrid": False, "title_font": { "size": 25 } }, "yaxis": { "title": SCORES["r2"], "title_font": { "size": 25 } }, "margin": { "l": 0, "r": 0, "b": 0, "t": 0 }, }) return fig, f"Average {SCORES['r2']} = {scores['r2'].mean().round(3)} +- {scores['r2'].std().round(3)}"
def _fill_heatmap_univariate_summary(item, main_category, data): import plotly.graph_objects as go if main_category == "All": list_categories = [ f"All_{one_main_category}" for one_main_category in MAIN_CATEGORIES_TO_CATEGORIES.keys() ] + list(pd.Index(MAIN_CATEGORIES_TO_CATEGORIES[main_category]).drop(["Genetics", "Phenotypic"])) else: list_categories = [f"All_{main_category}"] + MAIN_CATEGORIES_TO_CATEGORIES[main_category] summary = pd.DataFrame(data).set_index(["dimension", "category"]) summary.columns = pd.MultiIndex.from_tuples( list(map(eval, summary.columns.tolist())), names=["item", "observation"] ) summary_item_percentage = ( 100 * summary.reset_index().pivot( index=[("dimension", "")], columns=[("category", "")], values=(item, "percentage") ) ).astype(int) summary_item_percentage_category = summary_item_percentage[list_categories] summary_item_percentage_category.index.name = "dimension" summary_item_percentage_category.columns.name = "category" summary_item_number = summary.reset_index().pivot( index=[("dimension", "")], columns=[("category", "")], values=(item, "number") ) summary_item_number_category = summary_item_number[list_categories] summary_item_percentage_category.index.name = "dimension" summary_item_percentage_category.columns.name = "category" hovertemplate = "<br>".join( [ "X main category: %{x}", "Aging dimension: %{y}", f"{ITEMS_LEGEND[item]}: " + "%{customdata} ~ %{z} % of the variables", "<extra></extra>", ] ) heatmap = go.Heatmap( z=summary_item_percentage_category, x=summary_item_percentage_category.columns, y=summary_item_percentage_category.index, customdata=summary_item_number_category, hovertemplate=hovertemplate, colorscale=ITEMS_COLORSCALE[item], zmin=0, zmax=100, ) fig = go.Figure(heatmap) fig.update_layout( { "xaxis": {"title": "X subcategory", "tickangle": 90}, "yaxis": {"title": "Aging dimension"}, "width": max(30 * summary_item_percentage_category.shape[1], 500), "height": 30 * summary_item_percentage_category.shape[0], "xaxis_title_font": {"size": 25}, "yaxis_title_font": {"size": 25}, "margin": {"l": 0, "r": 0, "b": 0, "t": 0}, } ) return fig, ITEMS_TITLES[item]
) correlations_cleaned_dimensions = correlations_cleaned_dimensions_1.rename( index=DICT_TO_CHANGE_DIMENSIONS, level="dimension_2" ) correlations_cleaned = correlations_cleaned_dimensions.rename(index=DICT_TO_CHANGE_CATEGORIES, level="category") correlations_cleaned.reset_index().to_feather("data/xwas/univariate_correlations/correlations/correlations.feather") for dimension in DIMENSIONS: if dimension in DICT_TO_CHANGE_DIMENSIONS.keys(): dimension = DICT_TO_CHANGE_DIMENSIONS[dimension] correlations_cleaned.loc[dimension].reset_index().rename(columns={"dimension_2": "dimension"}).to_feather( f"data/xwas/univariate_correlations/correlations/dimensions/correlations_{dimension}.feather" ) for category in MAIN_CATEGORIES_TO_CATEGORIES["All"] + [ f"All_{main_category}" for main_category in MAIN_CATEGORIES_TO_CATEGORIES.keys() ]: if category in DICT_TO_CHANGE_CATEGORIES.keys(): category = DICT_TO_CHANGE_CATEGORIES[category] correlations_cleaned.swaplevel().swaplevel(i=0, j=1).loc[category].reset_index().to_feather( f"data/xwas/univariate_correlations/correlations/categories/correlations_{category}.feather" ) averages_correlations = load_feather("xwas/univariate_correlations/averages_correlations.feather") averages_correlations_cleaned_dimensions = averages_correlations.set_index(["dimension", "category"]).rename( index=DICT_TO_CHANGE_DIMENSIONS, level="dimension" ) averages_correlations_cleaned = averages_correlations_cleaned_dimensions.rename( index=DICT_TO_CHANGE_CATEGORIES, level="category" )
import pandas as pd from tqdm import tqdm from dash_website.utils.aws_loader import load_feather from dash_website import DIMENSIONS, MAIN_CATEGORIES_TO_CATEGORIES if __name__ == "__main__": list_indexes = [] for dimension in DIMENSIONS + ["All_aging_dimensions"]: for category in MAIN_CATEGORIES_TO_CATEGORIES["All"] + [ f"All_{main_category}" for main_category in MAIN_CATEGORIES_TO_CATEGORIES.keys() ]: list_indexes.append([dimension, category]) indexes = pd.MultiIndex.from_tuples(list_indexes, names=["dimension", "category"]) list_columns = [] for item in ["total", "significant", "accelerated_aging", "decelerated_aging"]: if item == "total": observations = ["total"] else: observations = ["number", "percentage"] for observation in observations: list_columns.append([item, observation]) columns = pd.MultiIndex.from_tuples(list_columns, names=["item", "observation"]) summary = pd.DataFrame(None, index=indexes, columns=columns) for dimension in tqdm(DIMENSIONS): correlations_dimension = load_feather( f"xwas/univariate_results/linear_correlations_{dimension}.feather",