def fc_dropdown(metric): """ Fills in the panel dropdown """ fc_list = sf.fc_list() fc_dict = [{"label": i, "value": i} for i in fc_list] return fc_dict
def pos_g19ratio(fc_click, my_type, start_date, end_date): fig = {} if fc_click: fclist_df = pd.DataFrame(sf.fc_list(), columns=['runid']) fclist_df.loc[:, 'date'] = (fclist_df.runid.str[2:4] + '-' + fclist_df.runid.str[4:6] + '-' + fclist_df.runid.str[:2]).astype(np.datetime64) fclist_df = fclist_df[(fclist_df['date'] > pd.to_datetime(start_date)) & (fclist_df['date'] < pd.to_datetime(end_date))] fclist = list(fclist_df.runid.unique()) # now get just those runs calls = sf.load_covid_data(fclist, 'c19_call.hdr.tsv') calls.loc[:, 'date'] = (calls.runid.str[2:4] + '-' + calls.runid.str[4:6] + '-' + calls.runid.str[:2]).astype(np.datetime64) sample_df = calls[calls['sample_type'] == my_type] fig = sf.fix_plot( px.scatter(sample_df, x='date', y='median_covid_ratio', color='flags')) fig_title = my_type + " control samples per day" fig.update_layout(yaxis_type="log", title_text=fig_title) return fig
def control_counts(fc_click, metric, cont_type, start_date, end_date): fig = {} if fc_click: # to make it faster only download the runs needed! # Make list of runs needed fclist_df = pd.DataFrame(sf.fc_list(), columns=['runid']) fclist_df.loc[:, 'date'] = (fclist_df.runid.str[2:4] + '-' + fclist_df.runid.str[4:6] + '-' + fclist_df.runid.str[:2]).astype(np.datetime64) fclist_df = fclist_df[(fclist_df['date'] > pd.to_datetime(start_date)) & (fclist_df['date'] < pd.to_datetime(end_date))] fclist = list(fclist_df.runid.unique()) # now get just those runs calls = sf.load_covid_data(fclist, 'c19_call.hdr.tsv') # add date calls.loc[:, 'date'] = (calls.runid.str[2:4] + '-' + calls.runid.str[4:6] + '-' + calls.runid.str[:2]).astype(np.datetime64) calls = calls[(calls['date'] > pd.to_datetime(start_date)) & (calls['date'] < pd.to_datetime(end_date))] # add tube rack calls['tube_rack'] = calls.pos_tube_rack.str.split(":").str[0] # seperate by sample # starting October 2020, G or H and not Ht g_samples = calls[calls.run_sample_id.str.startswith(('G', 'H'), na=False)] g_samples = g_samples[~g_samples.run_sample_id.str. startswith('Ht', na=False)] # use tube rack to get pos and ntc samples that are in the plates with samples g_tubes = g_samples.tube_rack.unique() g_calls = calls[calls['tube_rack'].isin(g_tubes)] # separate out postive and ntc samples pos_samples = g_calls[g_calls['sample_type'] == cont_type] samples_short = pos_samples[['date', 'flags', 'run_sample_id']] pos_short = samples_short.groupby( by=['date', 'flags']).count().reset_index() pos_short = pos_short.rename(columns={'run_sample_id': 'count'}) samples_total = pos_samples[['date', 'run_sample_id']] total_short = samples_total.groupby(by=[ 'date', ]).count().reset_index() total_short = total_short.rename(columns={'run_sample_id': 'total'}) pos_short = pos_short.merge(total_short, on='date') pos_short['percent'] = round(pos_short['count'] / pos_short['total'], 3) # make fig fig_title = cont_type + " control samples " + metric + " per day and the pass/fail flags" fig = px.bar(pos_short, x="date", y=metric, color='flags', hover_name='percent') fig = fig.update_traces(textposition='outside') fig = fig.update_layout(title_text=fig_title, yaxis_title="total sample " + metric) return fig
def percent_detect(fc_click, start_date, end_date): fig = {} if fc_click: # to make it faster only download the runs needed! # Make list of runs needed fclist_df = pd.DataFrame(sf.fc_list(), columns=['runid']) fclist_df.loc[:, 'date'] = (fclist_df.runid.str[2:4] + '-' + fclist_df.runid.str[4:6] + '-' + fclist_df.runid.str[:2]).astype(np.datetime64) fclist_df = fclist_df[(fclist_df['date'] > pd.to_datetime(start_date)) & (fclist_df['date'] < pd.to_datetime(end_date))] fclist = list(fclist_df.runid.unique()) # now get just those runs calls = sf.load_covid_data(fclist, 'c19_call.hdr.tsv') calls.loc[:, 'date'] = (calls.runid.str[2:4] + '-' + calls.runid.str[4:6] + '-' + calls.runid.str[:2]).astype(np.datetime64) # add tube rack # seperate by sample # starting October 2020, G or H and not Ht g_samples = calls[calls.run_sample_id.str.startswith(('G', 'H'), na=False)] g_samples = g_samples[~g_samples.run_sample_id.str. startswith('Ht', na=False)] g_short = g_samples[[ 'date', 'replicates_count', 'replicates_detected', 'replicates_no_call', 'replicates_not_detected' ]] g_short_group = g_short.groupby(by=[ 'date', ]).sum().reset_index() g_short_group['detect_percent'] = g_short_group[ 'replicates_detected'] / g_short_group['replicates_count'] * 100 g_short_group['no_call_percent'] = g_short_group[ 'replicates_no_call'] / g_short_group['replicates_count'] * 100 fig = go.Figure() # Add traces fig.add_trace( go.Scatter(x=g_short_group['date'], y=g_short_group['detect_percent'], mode='lines+markers', name='percent detect')) fig.add_trace( go.Scatter(x=g_short_group['date'], y=g_short_group['no_call_percent'], mode='lines+markers', name='percent no call')) fig = fig.update_layout( yaxis_title='percent', title_text= "Percent of individual clinical samples with Covid detction or no call" ) return fig
def get_discord_history(start_dt): start_date = start_dt end_date = dt.today() fclist_df = pd.DataFrame(fc_list(), columns=['runid']) fclist_df.loc[:, 'date'] = (fclist_df.runid.str[2:4] + '-' + fclist_df.runid.str[4:6] + '-' + fclist_df.runid.str[:2]).astype(np.datetime64) fclist_df = fclist_df[(fclist_df['date'] > pd.to_datetime(start_date)) & (fclist_df['date'] < pd.to_datetime(end_date))] fclist = list(fclist_df.runid.unique()) # now get just those runs historical = pd.DataFrame() discord = load_covid_data(fclist, 'c19_call.hdr.tsv') if len(discord) > 0: dis_table = find_discord(discord) historical = historical.append(dis_table) print(historical.shape, "biggest table") historical_only3 = historical[historical['group'].isin( ['concordant', 'discordant', 'one rep'])] historical_count = historical_only3.groupby( by=['call', 'type', 'group']).count().reset_index() historical_count = historical_count[[ 'call', 'type', 'group', 'Percent of Call', ]] historical_count = historical_count.rename( columns={'Percent of Call': 'count'}) print(historical_count.shape, "count") historical_short = historical_only3[[ 'call', 'type', 'group', 'Percent of Call', 'Percent of All (includes no call)' ]] historical_stdv = historical_short.groupby( by=['call', 'type', 'group']).std().reset_index() historical_stdv = historical_stdv.rename( columns={ 'Percent of Call': 'call_stdv', 'Percent of All (includes no call)': 'all_stdv' }) historical_mean = historical_short.groupby( by=['call', 'type', 'group']).mean().reset_index() historical_mean = historical_mean.rename( columns={ 'Percent of Call': 'call_mean', 'Percent of All (includes no call)': 'all_mean' }) history = historical_count.merge(historical_mean) history = history.merge(historical_stdv).round(4) return history
def sample_plot(fc_click, metric, start_date, end_date): fig = {} if fc_click: # to make it faster only download the runs needed! # Make list of runs needed fclist_df = pd.DataFrame(sf.fc_list(), columns=['runid']) fclist_df.loc[:, 'date'] = (fclist_df.runid.str[2:4] + '-' + fclist_df.runid.str[4:6] + '-' + fclist_df.runid.str[:2]).astype(np.datetime64) fclist_df = fclist_df[(fclist_df['date'] > pd.to_datetime(start_date)) & (fclist_df['date'] < pd.to_datetime(end_date))] fclist = list(fclist_df.runid.unique()) # now get just those runs calls = sf.load_covid_data(fclist, 'c19_call.hdr.tsv') # add date calls.loc[:, 'date'] = (calls.runid.str[2:4] + '-' + calls.runid.str[4:6] + '-' + calls.runid.str[:2]).astype(np.datetime64) # seperate by sample # starting October 2020, G or H and not Ht g_samples = calls[calls.run_sample_id.str.startswith(('G', 'H'), na=False)] g_samples = g_samples[~g_samples.run_sample_id.str. startswith('Ht', na=False)] samples_short = g_samples[['date', 'flags', 'run_sample_id']] pos_short = samples_short.groupby( by=['date', 'flags']).count().reset_index() pos_short = pos_short.rename(columns={'run_sample_id': 'count'}) samples_total = g_samples[['date', 'run_sample_id']] total_short = samples_total.groupby(by=[ 'date', ]).count().reset_index() total_short = total_short.rename(columns={'run_sample_id': 'total'}) pos_short = pos_short.merge(total_short, on='date') pos_short['percent'] = round(pos_short['count'] / pos_short['total'], 3) # make fig fig_title = "Clinical sample " + metric + " per day and the pass/fail flags" fig = px.bar(pos_short, x="date", y=metric, color='flags', hover_name='percent') fig.update_layout(title_text=fig_title) return fig
def discordance_rate(fc_click, metric, group_list, start_date, end_date): fig = {} if fc_click: fclist_df = pd.DataFrame(sf.fc_list(), columns=['runid']) fclist_df.loc[:, 'date'] = (fclist_df.runid.str[2:4] + '-' + fclist_df.runid.str[4:6] + '-' + fclist_df.runid.str[:2]).astype(np.datetime64) fclist_df = fclist_df[(fclist_df['date'] > pd.to_datetime(start_date)) & (fclist_df['date'] < pd.to_datetime(end_date))] fclist = list(fclist_df.runid.unique()) # now get just those runs calls = sf.load_covid_data(fclist, 'c19_call.hdr.tsv') calls.loc[:, 'date'] = (calls.runid.str[2:4] + '-' + calls.runid.str[4:6] + '-' + calls.runid.str[:2]).astype(np.datetime64) # add tube rack calls_short = calls[['date', 'run_sample_id']] calls_all = calls_short.groupby(by=['date']).count().reset_index() calls_all = calls_all.rename(columns={'run_sample_id': 'total'}) calls_short = calls[['date', 'replicates_detected', 'run_sample_id']] calls_repdetect = calls_short.groupby( by=['date', 'replicates_detected']).count().reset_index() calls_repdetect = calls_repdetect.rename(columns={ 'run_sample_id': 'count_detects', 'replicates_detected': 'group' }) calls_short = calls[['date', 'replicates_no_call', 'run_sample_id']] calls_repnocall = calls_short.groupby( by=['date', 'replicates_no_call']).count().reset_index() calls_repnocall = calls_repnocall.rename(columns={ 'run_sample_id': 'count_no_call', 'replicates_no_call': 'group' }) calls_short = calls[[ 'date', 'replicates_not_detected', 'run_sample_id' ]] calls_repNotdetect = calls_short.groupby( by=['date', 'replicates_not_detected']).count().reset_index() calls_repNotdetect = calls_repNotdetect.rename( columns={ 'run_sample_id': 'count_not_detect', 'replicates_not_detected': 'group' }) merge_calls = calls_repNotdetect.merge(calls_repnocall, on=['date', 'group']) merge_calls = merge_calls.merge(calls_repdetect, on=['date', 'group']) merge_calls = merge_calls.merge(calls_all, on=['date']) merge_calls['percent_not_detect'] = merge_calls[ 'count_not_detect'] / merge_calls['total'] merge_calls['percent_detect'] = merge_calls[ 'count_detects'] / merge_calls['total'] merge_calls['percent_no_call'] = merge_calls[ 'count_no_call'] / merge_calls['total'] merge_calls = merge_calls[merge_calls['group'] < 4] merge_calls['group'] = merge_calls['group'].astype(str) merge_calls = merge_calls[merge_calls.group.isin(group_list)] fig = px.bar(merge_calls, x='date', y=metric, color='group') return fig