def visualize_age_relations(dbutils): dataframe_pd = load_dataset(sys.argv[1], "age_relations", dbutils) display_positive = dataframe_pd["positive"] display_negative = dataframe_pd["negative"] display_age = dataframe_pd["age"] rec_age_fig = make_subplots( rows=1, cols=2, subplot_titles=("Positive test/age coefficient correlation", "Negative test/age coefficient correlation")) rec_age_fig.add_trace(go.Box(x=display_positive, y=display_age, name="Positive", marker_color=color_scheme.color_300), row=1, col=1) rec_age_fig.add_trace(go.Box(x=display_negative, y=display_age, name="Negative", marker_color=color_scheme.color_700), row=1, col=2) rec_age_fig.update_traces(boxpoints='all') rec_age_fig.update_layout( title_text="Subplots of age in relation a positive/negative test result" ) rec_age_fig.show() return None
def visualize_confirmed_cases_europe(dbutils): dataframe_pd = load_dataset(sys.argv[1], "confirmed_cases_europe", dbutils) fig = px.choropleth(dataframe_pd, locations="country", locationmode='country names', color="sum(confirmed)", hover_name="country", range_color=[1, 1000000], color_continuous_scale='portland', title='European countries with confirmed cases', scope='europe', height=800) fig.show() fig = px.bar(dataframe_pd.sort_values('sum(confirmed)', ascending=False)[:10][::-1], x='sum(confirmed)', y='country', color_discrete_sequence=[color_scheme.color_400], title='Confirmed cases in Europe (top-10 countries)', text='sum(confirmed)', orientation='h') fig.show() return None
def visualize_confirmed_cases_countries(dbutils): df_serbia_grouped = load_dataset(sys.argv[1], "confirmed_cases_serbia", dbutils) df_china_grouped = load_dataset(sys.argv[1], "confirmed_cases_china", dbutils) df_italy_grouped = load_dataset(sys.argv[1], "confirmed_cases_italy", dbutils) df_norway_grouped = load_dataset(sys.argv[1], "confirmed_cases_norway", dbutils) fig = go.Figure() fig.add_trace( go.Scatter(x=df_serbia_grouped['date'], y=df_serbia_grouped['sum(confirmed)'], name="Serbia", line_color=color_scheme.color_500, opacity=0.8)) fig.add_trace( go.Scatter(x=df_china_grouped['date'], y=df_china_grouped['sum(confirmed)'], name="China", line_color=color_scheme.color_700, opacity=0.8)) fig.add_trace( go.Scatter(x=df_italy_grouped['date'], y=df_italy_grouped['sum(confirmed)'], name="Italy", line_color=color_scheme.color_900, opacity=0.8)) fig.add_trace( go.Scatter(x=df_norway_grouped['date'], y=df_norway_grouped['sum(confirmed)'], name="Norway", line_color=color_scheme.color_300, opacity=0.8)) fig.update_layout( title_text="Overview of case growth in Serbia, China, Italy and Norway" ) fig.show() return None
def visualize_red_blood_cells_values(dbutils): dataframe_pd = load_dataset(sys.argv[1], "red_blood_cells_values", dbutils) fig = px.histogram(dataframe_pd, x="Red blood Cells", title="Red blood Cells distribution", color_discrete_sequence=[color_scheme.color_300], opacity=0.8, marginal="rug") fig.show() return None
def visualize_aggregate_age_result(dbutils): dataframe_pd = load_dataset(sys.argv[1], "aggregate_age_result", dbutils) fig = px.line(dataframe_pd, x="result", y="avg(age)", title="Average age/result distribution", log_y=True, color_discrete_sequence=[color_scheme.color_400]) fig.show() return None
def visualize_hemoglobin_values(dbutils): dataframe_pd = load_dataset(sys.argv[1], "hemoglobin_values", dbutils) fig = px.histogram(dataframe_pd, x="Hemoglobin", title="Hemoglobin distribution", color_discrete_sequence=[color_scheme.color_500], opacity=0.8, marginal="rug") fig.show() return None
def visualize_confirmed_cases_recovery_rates(dbutils): dataframe_pd = load_dataset(sys.argv[1], "confirmed_cases_recovery_rates", dbutils) fig = px.bar(dataframe_pd, x='recoveryRate', y='country', title='Recoveries per 100 confirmed cases (top-10)', text='recoveryRate', height=800, orientation='h', color_discrete_sequence=[color_scheme.color_500]) fig.show() return None
def visualize_predictions_test_result_distribution(dbutils): dataframe_pd = load_dataset(sys.argv[1], "predictions_test_result_distribution", dbutils) fig = px.pie(dataframe_pd, values='count', names='result', title="Statistics of test result distribution", color_discrete_sequence=[ color_scheme.color_100, color_scheme.color_400 ]) fig.show() return None
def visualize_predictions_missing_values(dbutils): dataframe_pd = load_dataset(sys.argv[1], "predictions_missing_values", dbutils) dataframe_pd = dataframe_pd.rename(index={ 0: 'count' }).T.sort_values("count", ascending=False) fig = px.bar( dataframe_pd, y="count", color_discrete_sequence=[ color_scheme.color_400, color_scheme.color_500 ], title="Statistics of missing (null/nan) values across columns") fig.show() return None
def visualize_predictions(dbutils): dataframe_pd = load_dataset(sys.argv[1], "predictions", dbutils) fig = go.Figure(data=[ go.Bar(y=dataframe_pd['value'], x=[ 'Random Forest classifier Accuracy', 'Decision Tree Accuracy', 'Logistic Regression Accuracy', 'Gradient-boosted Trees Accuracy' ]) ]) fig.update_traces(marker_color=color_scheme.color_200, marker_line_color=color_scheme.color_600, marker_line_width=1.5, opacity=0.6) fig.update_layout(title_text='Comparison of classifier accuracy reports') fig.show() return None
def visualize_confirmed_cases_and_deaths_globally(dbutils): dataframe_pd = load_dataset(sys.argv[1], "confirmed_cases_and_deaths_globally", dbutils) fig = px.line(dataframe_pd, x="date", y="sum(confirmed)", title="Confirmed cases over time (logarithmic)", log_y=True, color_discrete_sequence=[color_scheme.color_400]) fig.show() fig = px.line(dataframe_pd, x="date", y="sum(deaths)", title="Death cases over time (logarithmic)", log_y=True, color_discrete_sequence=[color_scheme.color_900]) fig.show() return None
def visualize_confirmed_cases_comparison(dbutils): dataframe_pd = load_dataset(sys.argv[1], "confirmed_cases_comparison", dbutils) df_melted = dataframe_pd.melt( id_vars="date", value_vars=['sum(recovered)', 'sum(deaths)', 'sum(active)'], var_name='case', value_name='count') fig = px.area(df_melted, x="date", y="count", color='case', title='Cases over time', color_discrete_sequence=[ color_scheme.color_200, color_scheme.color_400, color_scheme.color_800 ]) fig.show() return None
def visualize_abstracts_words(dbutils): dataframe_pd = load_dataset(sys.argv[1], "paper_abstracts", dbutils) text = dataframe_pd["clean_abstract"].values stopwords = set(STOPWORDS) fig = ff.create_distplot([dataframe_pd["sentiment_abstract"]], ["sentiment_abstract"], colors=[color_scheme.color_400]) fig.show() word_cloud = WordCloud(width=1000, height=500, stopwords=stopwords, background_color="white", max_words=25).generate(str(text)) fig = px.imshow(word_cloud.recolor(color_func=generate_custom_color, random_state=3), binary_compression_level=1, title="Most commonly used words in abstracts") fig.show() return None
def visualize_care_relations(dbutils): dataframe_pd = load_dataset(sys.argv[1], "care_relations", dbutils) fig = px.bar(dataframe_pd, y="result", x="Patient addmited to regular ward (1=yes, 0=no)", color_discrete_sequence=[ color_scheme.color_400, color_scheme.color_500 ], title="Positive patients admitted to regular care") fig.show() fig_intensive = px.bar( dataframe_pd, y="result", x="Patient addmited to intensive care unit (1=yes, 0=no)", color_discrete_sequence=[ color_scheme.color_900, color_scheme.color_500 ], title="Positive patients admitted to intensive care") fig_intensive.show() return None
def visualize_predictions_value_distribution(dbutils): dataframe_pd = load_dataset(sys.argv[1], "predictions_value_distribution", dbutils) fig = make_subplots( rows=3, cols=3, subplot_titles=("Hemoglobin/Exam Result", "Platelets/Exam Result", "Eosinophils/Exam Result", "Red blood Cells/Exam Result", "Lymphocytes/Exam Result", "Leukocytes/Exam Result", "Basophils/Exam Result", "Monocytes/Exam Result", "Hematocrit/Exam Result")) fig.add_trace(go.Scatter(x=dataframe_pd['SARS-Cov-2 exam result'], y=dataframe_pd["Hemoglobin"], mode='markers', marker=dict(color=color_scheme.color_900)), row=1, col=1) fig.add_trace(go.Scatter(x=dataframe_pd['SARS-Cov-2 exam result'], y=dataframe_pd["Platelets"], mode='markers', marker=dict(color=color_scheme.color_800)), row=1, col=2) fig.add_trace(go.Scatter(x=dataframe_pd['SARS-Cov-2 exam result'], y=dataframe_pd["Eosinophils"], mode='markers', marker=dict(color=color_scheme.color_700)), row=1, col=3) fig.add_trace(go.Scatter(x=dataframe_pd['SARS-Cov-2 exam result'], y=dataframe_pd["Red blood Cells"], mode='markers', marker=dict(color=color_scheme.color_600)), row=2, col=1) fig.add_trace(go.Scatter(x=dataframe_pd['SARS-Cov-2 exam result'], y=dataframe_pd["Lymphocytes"], mode='markers', marker=dict(color=color_scheme.color_500)), row=2, col=2) fig.add_trace(go.Scatter(x=dataframe_pd['SARS-Cov-2 exam result'], y=dataframe_pd["Leukocytes"], mode='markers', marker=dict(color=color_scheme.color_400)), row=2, col=3) fig.add_trace(go.Scatter(x=dataframe_pd['SARS-Cov-2 exam result'], y=dataframe_pd["Basophils"], mode='markers', marker=dict(color=color_scheme.color_300)), row=3, col=1) fig.add_trace(go.Scatter(x=dataframe_pd['SARS-Cov-2 exam result'], y=dataframe_pd["Monocytes"], mode='markers', marker=dict(color=color_scheme.color_200)), row=3, col=2) fig.add_trace(go.Scatter(x=dataframe_pd['SARS-Cov-2 exam result'], y=dataframe_pd["Hematocrit"], mode='markers', marker=dict(color=color_scheme.color_100)), row=3, col=3) fig.show() return None