def scatter_g19_score(fc): fig = {} if fc == None: return fig counts = sf.load_covid_data([fc], 'c19_read_counts.hdr.tsv') counts['Covid Score'] = counts['covid_ratio'] + 0.000001 counts['total reads'] = (counts['covid_ratio'] + counts['rnase_count'] + counts['spikein_count'] + counts['unknown']) counts['replicate_flags'] = counts['replicate_flags'].replace(np.nan, "not available") g_samples = counts[counts.run_sample_id.str.startswith(('G', 'H'), na=False)] g_samples = g_samples[~g_samples.run_sample_id.str.startswith('Ht', na=False)] g_samples = g_samples.sort_values('Covid Score', ascending=False) if len(g_samples) > 0: fig = sf.fix_plot(px.scatter(g_samples, color='replicate_flags', x='run_sample_id', y='Covid Score')) fig.update_layout(yaxis_type="log", title_text="Individual clinical samples for this FC and the flags for each replicate") else: fig = sf.fix_plot(px.scatter(counts, color='replicate_flags', x='run_sample_id', y='Covid Score')) fig.update_layout(yaxis_type="log", title_text="All Samples for this FC and the flags for each replicate") # some sample id are a long integer, I want them as a category. fig.update_xaxes(type='category') return fig
def pos_g19ratio(fc_click, my_type, start_date, end_date): fig = {} if fc_click: fclist_df = pd.DataFrame(sf.fc_list(), columns=['runid']) fclist_df.loc[:, 'date'] = (fclist_df.runid.str[2:4] + '-' + fclist_df.runid.str[4:6] + '-' + fclist_df.runid.str[:2]).astype(np.datetime64) fclist_df = fclist_df[(fclist_df['date'] > pd.to_datetime(start_date)) & (fclist_df['date'] < pd.to_datetime(end_date))] fclist = list(fclist_df.runid.unique()) # now get just those runs calls = sf.load_covid_data(fclist, 'c19_call.hdr.tsv') calls.loc[:, 'date'] = (calls.runid.str[2:4] + '-' + calls.runid.str[4:6] + '-' + calls.runid.str[:2]).astype(np.datetime64) sample_df = calls[calls['sample_type'] == my_type] fig = sf.fix_plot( px.scatter(sample_df, x='date', y='median_covid_ratio', color='flags')) fig_title = my_type + " control samples per day" fig.update_layout(yaxis_type="log", title_text=fig_title) return fig
def total_read_bar(fc, metric, label): fig = {} if fc == None: return fig # return empty figure if no FC sub_df = sf.load_covid_data([fc], 'pool_stats.hdr.tsv') fig = px.bar(sub_df, x='pos_pooling', y=metric, color='pos_pooling', color_discrete_sequence=px.colors.sequential.Agsunset) fig = fig.update_layout(yaxis_title=metric) fig = sf.fix_plot(fig) return fig
def data_scatter(fcs, metric): df = sf.compile_fc_metrics() if fcs == None: sub_df = df # initial testing with all data if metric != "All": sub_df = sub_df[sub_df.Metric == metric] fig = px.scatter(sub_df, x="date", y="Value", color="Metric") fig.update_layout(title="all metrics", yaxis_type="log") fig = sf.fix_plot(fig) else: sub_df = df[df.Flowcell.isin(fcs)] if metric != "All": sub_df = sub_df[sub_df.Metric == metric] fig = px.bar(sub_df, x='Metric', y='Value', barmode='group', color='Flowcell') fig.update_layout(yaxis_type="log") fig = sf.fix_plot(fig) return fig
def read_type_bar(fc): stack_colors = ['#273c75', '#00a8ff', '#c23616', '#e84118', '#7f8fa6', '#e1b12c', '#fbc531'] fig = {} if fc == None: return fig # return empty figure if no FC sub_df = sf.load_covid_data([fc], 'pool_stats.hdr.tsv') cols = [x for x in sub_df.columns if 'percent' in x] + ['pos_pooling'] cols.remove('percent_usable_reads') fig = px.bar(sub_df[cols].melt(id_vars='pos_pooling'), x='pos_pooling', y='value', color='variable', color_discrete_sequence=stack_colors, width=1200) fig = fig.update_layout(yaxis_title='Percent of Reads') fig = sf.fix_plot(fig) return fig
def samples_plot(n_clicks, start_date, end_date): positive = {} negative = {} if n_clicks: samples = pd.read_pickle( '/ghds/groups/labdesk/bshih/c19dash/c19_dashboard/c19_call.pickle' ).dropna(subset=['run_sample_id']) samples['date'] = (samples.runid.str[2:4] + '-' + samples.runid.str[4:6] + '-' + samples.runid.str[:2]).astype(np.datetime64) samples = samples[(samples['date'] >= pd.to_datetime(start_date)) & (samples['date'] <= pd.to_datetime(end_date))] positive_df = samples[samples['sample_type'] == 'Positive'] positive_df['median_covid_ratio'].replace({0: 0.0001}, inplace=True) positive_df.loc[positive_df['flags'] == 'pass', 'run_sample_id'] = np.nan max_pos = np.log10(positive_df['median_covid_ratio'].max() * 1.5) negative_df = samples[samples['sample_type'] == 'NTC'] negative_df.loc[negative_df['flags'] == 'pass', 'run_sample_id'] = np.nan positive = sf.fix_plot( px.scatter(positive_df, x='date', y='median_covid_ratio', color='flags', height=400)) positive.update_layout(legend=dict(orientation='h', yanchor='bottom', y=1.02, xanchor='right', x=1, font=dict(size=14))) positive.update_yaxes(type="log", range=[-4, max_pos], tickmode='auto', nticks=5) positive.add_shape(type='line', xref='paper', yref='y', x0=0, y0=0.01, x1=1, y1=0.01, line=dict( color='Red', width=2, dash='dot', )) current_date = 0 yshift = 15 for index, row in positive_df[~positive_df.run_sample_id.isna( )].iterrows(): if current_date == row['date']: yshift += 20 else: yshift = 15 positive.add_annotation(x=row['date'], y=np.log10(row['median_covid_ratio']), text=row['run_sample_id'], showarrow=False, yshift=yshift) current_date = row['date'] negative = sf.fix_plot( px.scatter(negative_df, x='date', y='median_covid_ratio', color='flags', height=400)) negative.update_traces(marker_size=15) negative.update_layout(legend=dict(orientation='h', yanchor='bottom', y=1.02, xanchor='right', x=1, font=dict(size=14))) negative.add_shape(type='line', xref='paper', yref='y', x0=0, y0=0.01, x1=1, y1=0.01, line=dict( color='Red', width=2, dash='dot', )) negative.update_yaxes(zeroline=True, zerolinecolor='black') current_date = 0 yshift = 15 for index, row in negative_df[~negative_df.run_sample_id.isna( )].iterrows(): if current_date == row['date']: yshift += 20 else: yshift = 15 negative.add_annotation(x=row['date'], y=row['median_covid_ratio'], text=row['run_sample_id'], showarrow=False, yshift=yshift) current_date = row['date'] return positive, negative
def samples_plot(n_clicks, start_date, end_date): positive = {} negative = {} if n_clicks: samples = pd.read_pickle( '/ghds/groups/labdesk/bshih/c19dash/c19_dashboard/c19_read_counts.pickle' ).dropna(subset=['run_sample_id']) samples['date'] = (samples.runid.str[2:4] + '-' + samples.runid.str[4:6] + '-' + samples.runid.str[:2]).astype(np.datetime64) samples = samples[(samples['date'] >= pd.to_datetime(start_date)) & (samples['date'] <= pd.to_datetime(end_date))] positive_df = samples[samples['sample_type'] == 'Positive'] positive_df['covid_ratio'].replace({0: 0.0001}, inplace=True) max_pos = np.log(positive_df['covid_ratio'].max() * 1.01) negative_df = samples[samples['sample_type'] == 'NTC'] positive = sf.fix_plot( px.scatter(positive_df, x='date', y='covid_ratio', color='replicate_flags', height=400)) positive.update_yaxes(type="log", range=[-4, max_pos], tickmode='auto', nticks=5) positive.add_shape(type='line', xref='paper', yref='y', x0=0, y0=0.01, x1=1, y1=0.01, line=dict( color='Red', width=2, dash='dot', )) negative = sf.fix_plot( px.scatter(negative_df, x='date', y='covid_ratio', color='replicate_flags', height=400)) negative.update_traces(marker_size=15) negative.add_shape(type='line', xref='paper', yref='y', x0=0, y0=0.01, x1=1, y1=0.01, line=dict( color='Red', width=2, dash='dot', )) negative.update_yaxes(zeroline=True, zerolinecolor='black') return positive, negative
def make_pl_lm_plot(fc_click, my_metric, label, start_date, end_date): fig = {} if fc_click: pl_df = sf.compile_data_generic('pool_stats.hdr.tsv').sort_values( ['runid', 'pos_pooling'], ascending=[False, True]) pl_df.loc[:, 'date'] = (pl_df.runid.str[2:4] + '-' + pl_df.runid.str[4:6] + '-' + pl_df.runid.str[:2]).astype(np.datetime64) pl_df = pl_df[(pl_df['date'] > pd.to_datetime(start_date)) & (pl_df['date'] < pd.to_datetime(end_date))] # get threshold metrics data metrics = pd.read_csv('thresholds_20200912.csv') metrics.set_index("metric", inplace=True) # get mean data mean_table = pd.read_csv('Mean_Table.csv') mean1 = mean_table.loc[mean_table['index'] == my_metric, 'mean'].iloc[0] stdev1 = mean_table.loc[mean_table['index'] == my_metric, 'std'].iloc[0] fig = sf.fix_plot( px.strip(pl_df, x='date', y=my_metric, hover_name="runid", log_y=True)) # add threshold line if my_metric in metrics.index: fig.add_traces( go.Scatter(x=[pl_df['date'].iloc[0], pl_df['date'].iloc[-1]], y=[ metrics.loc[my_metric, 'threshold'], metrics.loc[my_metric, 'threshold'] ], name='Threshold', line=dict(color='red', width=4, dash='dot'))) # add mean fig.add_traces( go.Scatter(x=[pl_df['date'].iloc[0], pl_df['date'].iloc[-1]], y=[mean1, mean1], name='Mean', line=dict(color='rgb(127, 60, 141)', width=4, dash='dot'))) # add standard deviation pos_stdv1 = mean1 + stdev1 neg_stdv1 = mean1 - stdev1 pos_stdv2 = mean1 + (2 * stdev1) neg_stdv2 = mean1 - (2 * stdev1) fig.add_traces( go.Scatter(x=[pl_df['date'].iloc[0], pl_df['date'].iloc[-1]], y=[pos_stdv1, pos_stdv1], name='+1 StDev', line=dict(color='rgb(17, 165, 121)', width=4, dash='dot'))) fig.add_traces( go.Scatter(x=[pl_df['date'].iloc[0], pl_df['date'].iloc[-1]], y=[neg_stdv1, neg_stdv1], name='-1 StDev', line=dict(color='rgb(242, 183, 1)', width=4, dash='dot'))) fig.add_traces( go.Scatter(x=[pl_df['date'].iloc[0], pl_df['date'].iloc[-1]], y=[pos_stdv2, pos_stdv2], name='+2 StDev', line=dict(color='rgb(128, 186, 90)', width=4, dash='dot'))) fig.add_traces( go.Scatter(x=[pl_df['date'].iloc[0], pl_df['date'].iloc[-1]], y=[neg_stdv2, neg_stdv2], name='-2 StDev', line=dict(color='rgb(230, 131, 16)', width=4, dash='dot'))) # fig = sf.add_rangeslider(fig) fig = fig.update_layout(yaxis_title=my_metric) return fig
def make_fc_lm_plot(fc_click, my_metric, label, start_date, end_date): fig = {} if fc_click: # get fc data to fill the graph fc_df = sf.compile_data_generic('interop_db.hdr.tsv').sort_values( 'runid', ascending=False) fc_df.loc[:, 'date'] = (fc_df.runid.str[2:4] + '-' + fc_df.runid.str[4:6] + '-' + fc_df.runid.str[:2]).astype(np.datetime64) # my hope is that this will allow faster loading # future warning can not use datetime with pandas anymore. fc_df = fc_df[(fc_df['date'] > pd.to_datetime(start_date)) & (fc_df['date'] < pd.to_datetime(end_date))] # get threshold metrics data metrics = pd.read_csv( '/ghds/groups/labdesk/bshih/c19dash/c19_dashboard/thresholds_20200912.csv' ) metrics.drop(index=[7, 8, 9, 10, 11, 12, 13, 14], inplace=True) metrics.set_index('metric', inplace=True) metrics.loc[:, 'min_threshold'] = metrics['threshold'] metrics.loc[:, 'max_threshold'] = np.nan metrics.loc['cluster_density', 'max_threshold'] = 335000 # get mean data mean_table = pd.read_csv('Mean_Table.csv') mean1 = mean_table.loc[mean_table['index'] == my_metric, 'mean'].iloc[0] stdev1 = mean_table.loc[mean_table['index'] == my_metric, 'std'].iloc[0] # create figure first with fc data fig = sf.fix_plot( px.line(fc_df, hover_name="runid", x='date', y=my_metric)) # add threshold line if my_metric in metrics.index: if metrics.loc[my_metric, 'min_threshold']: fig.add_traces( go.Scatter( x=[fc_df['date'].iloc[0], fc_df['date'].iloc[-1]], y=[ metrics.loc[my_metric, 'min_threshold'], metrics.loc[my_metric, 'min_threshold'] ], name='Minimum Threshold', line=dict(color='red', width=4, dash='dot'))) if not np.isnan(metrics.loc[my_metric, 'max_threshold']): fig.add_traces( go.Scatter( x=[fc_df['date'].iloc[0], fc_df['date'].iloc[-1]], y=[ metrics.loc[my_metric, 'max_threshold'], metrics.loc[my_metric, 'max_threshold'] ], name='Maximum Threshold', line=dict(color='red', width=4, dash='dot'))) # add mean fig.add_traces( go.Scatter(x=[fc_df['date'].iloc[0], fc_df['date'].iloc[-1]], y=[mean1, mean1], name='Mean', line=dict(color='rgb(127, 60, 141)', width=4, dash='dot'))) # add standard deviation pos_stdv1 = mean1 + stdev1 neg_stdv1 = mean1 - stdev1 pos_stdv2 = mean1 + (2 * stdev1) neg_stdv2 = mean1 - (2 * stdev1) if neg_stdv2 < 0: neg_stdv2 = 0 fig.add_traces( go.Scatter(x=[fc_df['date'].iloc[0], fc_df['date'].iloc[-1]], y=[pos_stdv1, pos_stdv1], name='+1 StDev', line=dict(color='rgb(17, 165, 121)', width=4, dash='dot'))) fig.add_traces( go.Scatter(x=[fc_df['date'].iloc[0], fc_df['date'].iloc[-1]], y=[neg_stdv1, neg_stdv1], name='-1 StDev', line=dict(color='rgb(242, 183, 1)', width=4, dash='dot'))) fig.add_traces( go.Scatter(x=[fc_df['date'].iloc[0], fc_df['date'].iloc[-1]], y=[pos_stdv2, pos_stdv2], name='+2 StDev', line=dict(color='rgb(128, 186, 90)', width=4, dash='dot'))) fig.add_traces( go.Scatter(x=[fc_df['date'].iloc[0], fc_df['date'].iloc[-1]], y=[neg_stdv2, neg_stdv2], name='-2 StDev', line=dict(color='rgb(230, 131, 16)', width=4, dash='dot'))) # update figure range and date fig = fig.update_layout(yaxis_title=my_metric) try: for _, row in fc_df.iterrows(): if row[my_metric] < metrics.loc[ my_metric, 'min_threshold'] or row[my_metric] > metrics.loc[ my_metric, 'max_threshold']: fig.add_annotation(x=row['date'], y=row[my_metric], text='<b>{}</b>'.format( row['runid'][22:]), showarrow=False, yshift=-10) except KeyError: pass return fig