Example #1
0
def fc_dropdown(metric):
    """
    Fills in the panel dropdown
    """
    fc_list = sf.fc_list()
    fc_dict = [{"label": i, "value": i} for i in fc_list]
    return fc_dict
Example #2
0
def pos_g19ratio(fc_click, my_type, start_date, end_date):
    fig = {}
    if fc_click:
        fclist_df = pd.DataFrame(sf.fc_list(), columns=['runid'])
        fclist_df.loc[:,
                      'date'] = (fclist_df.runid.str[2:4] + '-' +
                                 fclist_df.runid.str[4:6] + '-' +
                                 fclist_df.runid.str[:2]).astype(np.datetime64)
        fclist_df = fclist_df[(fclist_df['date'] > pd.to_datetime(start_date))
                              & (fclist_df['date'] < pd.to_datetime(end_date))]
        fclist = list(fclist_df.runid.unique())
        # now get just those runs
        calls = sf.load_covid_data(fclist, 'c19_call.hdr.tsv')
        calls.loc[:, 'date'] = (calls.runid.str[2:4] + '-' +
                                calls.runid.str[4:6] + '-' +
                                calls.runid.str[:2]).astype(np.datetime64)
        sample_df = calls[calls['sample_type'] == my_type]

        fig = sf.fix_plot(
            px.scatter(sample_df,
                       x='date',
                       y='median_covid_ratio',
                       color='flags'))
        fig_title = my_type + " control samples per day"
        fig.update_layout(yaxis_type="log", title_text=fig_title)

    return fig
Example #3
0
def control_counts(fc_click, metric, cont_type, start_date, end_date):
    fig = {}
    if fc_click:
        # to make it faster only download the runs needed!
        # Make list of runs needed
        fclist_df = pd.DataFrame(sf.fc_list(), columns=['runid'])
        fclist_df.loc[:,
                      'date'] = (fclist_df.runid.str[2:4] + '-' +
                                 fclist_df.runid.str[4:6] + '-' +
                                 fclist_df.runid.str[:2]).astype(np.datetime64)
        fclist_df = fclist_df[(fclist_df['date'] > pd.to_datetime(start_date))
                              & (fclist_df['date'] < pd.to_datetime(end_date))]
        fclist = list(fclist_df.runid.unique())
        # now get just those runs
        calls = sf.load_covid_data(fclist, 'c19_call.hdr.tsv')
        # add date
        calls.loc[:, 'date'] = (calls.runid.str[2:4] + '-' +
                                calls.runid.str[4:6] + '-' +
                                calls.runid.str[:2]).astype(np.datetime64)
        calls = calls[(calls['date'] > pd.to_datetime(start_date))
                      & (calls['date'] < pd.to_datetime(end_date))]
        # add tube rack
        calls['tube_rack'] = calls.pos_tube_rack.str.split(":").str[0]
        # seperate by sample
        # starting October 2020, G or H and not Ht
        g_samples = calls[calls.run_sample_id.str.startswith(('G', 'H'),
                                                             na=False)]
        g_samples = g_samples[~g_samples.run_sample_id.str.
                              startswith('Ht', na=False)]

        # use tube rack to get pos and ntc samples that are in the plates with samples
        g_tubes = g_samples.tube_rack.unique()
        g_calls = calls[calls['tube_rack'].isin(g_tubes)]
        # separate out postive and ntc samples
        pos_samples = g_calls[g_calls['sample_type'] == cont_type]
        samples_short = pos_samples[['date', 'flags', 'run_sample_id']]
        pos_short = samples_short.groupby(
            by=['date', 'flags']).count().reset_index()
        pos_short = pos_short.rename(columns={'run_sample_id': 'count'})

        samples_total = pos_samples[['date', 'run_sample_id']]
        total_short = samples_total.groupby(by=[
            'date',
        ]).count().reset_index()
        total_short = total_short.rename(columns={'run_sample_id': 'total'})

        pos_short = pos_short.merge(total_short, on='date')
        pos_short['percent'] = round(pos_short['count'] / pos_short['total'],
                                     3)
        # make fig
        fig_title = cont_type + " control samples " + metric + " per day and the pass/fail flags"
        fig = px.bar(pos_short,
                     x="date",
                     y=metric,
                     color='flags',
                     hover_name='percent')
        fig = fig.update_traces(textposition='outside')
        fig = fig.update_layout(title_text=fig_title,
                                yaxis_title="total sample " + metric)
    return fig
Example #4
0
def percent_detect(fc_click, start_date, end_date):
    fig = {}
    if fc_click:
        # to make it faster only download the runs needed!
        # Make list of runs needed
        fclist_df = pd.DataFrame(sf.fc_list(), columns=['runid'])
        fclist_df.loc[:,
                      'date'] = (fclist_df.runid.str[2:4] + '-' +
                                 fclist_df.runid.str[4:6] + '-' +
                                 fclist_df.runid.str[:2]).astype(np.datetime64)
        fclist_df = fclist_df[(fclist_df['date'] > pd.to_datetime(start_date))
                              & (fclist_df['date'] < pd.to_datetime(end_date))]
        fclist = list(fclist_df.runid.unique())
        # now get just those runs
        calls = sf.load_covid_data(fclist, 'c19_call.hdr.tsv')
        calls.loc[:, 'date'] = (calls.runid.str[2:4] + '-' +
                                calls.runid.str[4:6] + '-' +
                                calls.runid.str[:2]).astype(np.datetime64)
        # add tube rack
        # seperate by sample
        # starting October 2020, G or H and not Ht
        g_samples = calls[calls.run_sample_id.str.startswith(('G', 'H'),
                                                             na=False)]
        g_samples = g_samples[~g_samples.run_sample_id.str.
                              startswith('Ht', na=False)]

        g_short = g_samples[[
            'date', 'replicates_count', 'replicates_detected',
            'replicates_no_call', 'replicates_not_detected'
        ]]
        g_short_group = g_short.groupby(by=[
            'date',
        ]).sum().reset_index()
        g_short_group['detect_percent'] = g_short_group[
            'replicates_detected'] / g_short_group['replicates_count'] * 100
        g_short_group['no_call_percent'] = g_short_group[
            'replicates_no_call'] / g_short_group['replicates_count'] * 100

        fig = go.Figure()
        # Add traces
        fig.add_trace(
            go.Scatter(x=g_short_group['date'],
                       y=g_short_group['detect_percent'],
                       mode='lines+markers',
                       name='percent detect'))
        fig.add_trace(
            go.Scatter(x=g_short_group['date'],
                       y=g_short_group['no_call_percent'],
                       mode='lines+markers',
                       name='percent no call'))
        fig = fig.update_layout(
            yaxis_title='percent',
            title_text=
            "Percent of individual clinical samples with Covid detction or no call"
        )
    return fig
Example #5
0
def get_discord_history(start_dt):
    start_date = start_dt
    end_date = dt.today()
    fclist_df = pd.DataFrame(fc_list(), columns=['runid'])
    fclist_df.loc[:, 'date'] = (fclist_df.runid.str[2:4] + '-' +
                                fclist_df.runid.str[4:6] + '-' +
                                fclist_df.runid.str[:2]).astype(np.datetime64)
    fclist_df = fclist_df[(fclist_df['date'] > pd.to_datetime(start_date))
                          & (fclist_df['date'] < pd.to_datetime(end_date))]
    fclist = list(fclist_df.runid.unique())
    # now get just those runs
    historical = pd.DataFrame()
    discord = load_covid_data(fclist, 'c19_call.hdr.tsv')
    if len(discord) > 0:
        dis_table = find_discord(discord)
        historical = historical.append(dis_table)
    print(historical.shape, "biggest table")
    historical_only3 = historical[historical['group'].isin(
        ['concordant', 'discordant', 'one rep'])]
    historical_count = historical_only3.groupby(
        by=['call', 'type', 'group']).count().reset_index()
    historical_count = historical_count[[
        'call',
        'type',
        'group',
        'Percent of Call',
    ]]
    historical_count = historical_count.rename(
        columns={'Percent of Call': 'count'})
    print(historical_count.shape, "count")

    historical_short = historical_only3[[
        'call', 'type', 'group', 'Percent of Call',
        'Percent of All (includes no call)'
    ]]
    historical_stdv = historical_short.groupby(
        by=['call', 'type', 'group']).std().reset_index()
    historical_stdv = historical_stdv.rename(
        columns={
            'Percent of Call': 'call_stdv',
            'Percent of All (includes no call)': 'all_stdv'
        })

    historical_mean = historical_short.groupby(
        by=['call', 'type', 'group']).mean().reset_index()
    historical_mean = historical_mean.rename(
        columns={
            'Percent of Call': 'call_mean',
            'Percent of All (includes no call)': 'all_mean'
        })

    history = historical_count.merge(historical_mean)
    history = history.merge(historical_stdv).round(4)
    return history
Example #6
0
def sample_plot(fc_click, metric, start_date, end_date):
    fig = {}
    if fc_click:
        # to make it faster only download the runs needed!
        # Make list of runs needed
        fclist_df = pd.DataFrame(sf.fc_list(), columns=['runid'])
        fclist_df.loc[:,
                      'date'] = (fclist_df.runid.str[2:4] + '-' +
                                 fclist_df.runid.str[4:6] + '-' +
                                 fclist_df.runid.str[:2]).astype(np.datetime64)
        fclist_df = fclist_df[(fclist_df['date'] > pd.to_datetime(start_date))
                              & (fclist_df['date'] < pd.to_datetime(end_date))]
        fclist = list(fclist_df.runid.unique())
        # now get just those runs
        calls = sf.load_covid_data(fclist, 'c19_call.hdr.tsv')
        # add date
        calls.loc[:, 'date'] = (calls.runid.str[2:4] + '-' +
                                calls.runid.str[4:6] + '-' +
                                calls.runid.str[:2]).astype(np.datetime64)
        # seperate by sample
        # starting October 2020, G or H and not Ht
        g_samples = calls[calls.run_sample_id.str.startswith(('G', 'H'),
                                                             na=False)]
        g_samples = g_samples[~g_samples.run_sample_id.str.
                              startswith('Ht', na=False)]

        samples_short = g_samples[['date', 'flags', 'run_sample_id']]
        pos_short = samples_short.groupby(
            by=['date', 'flags']).count().reset_index()
        pos_short = pos_short.rename(columns={'run_sample_id': 'count'})

        samples_total = g_samples[['date', 'run_sample_id']]
        total_short = samples_total.groupby(by=[
            'date',
        ]).count().reset_index()
        total_short = total_short.rename(columns={'run_sample_id': 'total'})

        pos_short = pos_short.merge(total_short, on='date')
        pos_short['percent'] = round(pos_short['count'] / pos_short['total'],
                                     3)
        # make fig
        fig_title = "Clinical sample " + metric + " per day and the pass/fail flags"
        fig = px.bar(pos_short,
                     x="date",
                     y=metric,
                     color='flags',
                     hover_name='percent')
        fig.update_layout(title_text=fig_title)
    return fig
Example #7
0
def discordance_rate(fc_click, metric, group_list, start_date, end_date):
    fig = {}
    if fc_click:
        fclist_df = pd.DataFrame(sf.fc_list(), columns=['runid'])
        fclist_df.loc[:,
                      'date'] = (fclist_df.runid.str[2:4] + '-' +
                                 fclist_df.runid.str[4:6] + '-' +
                                 fclist_df.runid.str[:2]).astype(np.datetime64)
        fclist_df = fclist_df[(fclist_df['date'] > pd.to_datetime(start_date))
                              & (fclist_df['date'] < pd.to_datetime(end_date))]
        fclist = list(fclist_df.runid.unique())
        # now get just those runs
        calls = sf.load_covid_data(fclist, 'c19_call.hdr.tsv')
        calls.loc[:, 'date'] = (calls.runid.str[2:4] + '-' +
                                calls.runid.str[4:6] + '-' +
                                calls.runid.str[:2]).astype(np.datetime64)
        # add tube rack
        calls_short = calls[['date', 'run_sample_id']]
        calls_all = calls_short.groupby(by=['date']).count().reset_index()
        calls_all = calls_all.rename(columns={'run_sample_id': 'total'})

        calls_short = calls[['date', 'replicates_detected', 'run_sample_id']]
        calls_repdetect = calls_short.groupby(
            by=['date', 'replicates_detected']).count().reset_index()
        calls_repdetect = calls_repdetect.rename(columns={
            'run_sample_id': 'count_detects',
            'replicates_detected': 'group'
        })

        calls_short = calls[['date', 'replicates_no_call', 'run_sample_id']]
        calls_repnocall = calls_short.groupby(
            by=['date', 'replicates_no_call']).count().reset_index()
        calls_repnocall = calls_repnocall.rename(columns={
            'run_sample_id': 'count_no_call',
            'replicates_no_call': 'group'
        })

        calls_short = calls[[
            'date', 'replicates_not_detected', 'run_sample_id'
        ]]
        calls_repNotdetect = calls_short.groupby(
            by=['date', 'replicates_not_detected']).count().reset_index()
        calls_repNotdetect = calls_repNotdetect.rename(
            columns={
                'run_sample_id': 'count_not_detect',
                'replicates_not_detected': 'group'
            })

        merge_calls = calls_repNotdetect.merge(calls_repnocall,
                                               on=['date', 'group'])
        merge_calls = merge_calls.merge(calls_repdetect, on=['date', 'group'])
        merge_calls = merge_calls.merge(calls_all, on=['date'])
        merge_calls['percent_not_detect'] = merge_calls[
            'count_not_detect'] / merge_calls['total']
        merge_calls['percent_detect'] = merge_calls[
            'count_detects'] / merge_calls['total']
        merge_calls['percent_no_call'] = merge_calls[
            'count_no_call'] / merge_calls['total']
        merge_calls = merge_calls[merge_calls['group'] < 4]
        merge_calls['group'] = merge_calls['group'].astype(str)

        merge_calls = merge_calls[merge_calls.group.isin(group_list)]

        fig = px.bar(merge_calls, x='date', y=metric, color='group')

    return fig