Esempio n. 1
0
def test_no_wells():
    with raises(ConfigError, match="No wells defined"):
        bio96.load(DIR / "empty.toml")

    # The following examples actually trigger a different (and more specific)
    # exception, but it still has "No wells defined" in the message.
    with raises(ConfigError, match="No wells defined"):
        bio96.load(DIR / "row_without_col.toml")
    with raises(ConfigError, match="No wells defined"):
        bio96.load(DIR / "irow_without_col.toml")
    with raises(ConfigError, match="No wells defined"):
        bio96.load(DIR / "col_without_row.toml")
    with raises(ConfigError, match="No wells defined"):
        bio96.load(DIR / "icol_without_row.toml")
Esempio n. 2
0
def main():
    import docopt

    try:
        args = docopt.docopt(__doc__)
        toml_path = Path(args['<toml>'])

        df = bio96.load(toml_path)
        cmap = colorcet.cm.get(args['--color'], plt.get_cmap(args['--color']))

        if not args['--foreground'] and not args['--output']:
            if os.fork() != 0:
                sys.exit()

        fig = plot_layout(df, args['<attr>'], cmap=cmap)

        if args['--output']:
            out_path = args['--output'].replace('$', toml_path.stem)
            fig.savefig(out_path)
            print("Layout written to:", out_path)
        else:
            title = str(toml_path)
            if args['<attr>']: title += f' [{", ".join(args["<attr>"])}]'
            fig.canvas.set_window_title(title)
            plt.show()

    except CliError as err:
        print(err)
    except ConfigError as err:
        err.toml_path = toml_path
        print(err)
Esempio n. 3
0
def test_two_plates():
    df = bio96.load(
        DIR / 'two_plates.toml',
        data_loader=pd.read_csv,
        merge_cols={'well': 'Well'},
    )
    assert row(df, 'plate == "a"') == dict(
        path=DIR / 'two_plates_a.csv',
        plate='a',
        well='A1',
        Well='A1',
        well0='A01',
        row='A',
        col='1',
        row_i=0,
        col_j=0,
        x=1,
        Data=0,
    )
    assert row(df, 'plate == "b"') == dict(
        path=DIR / 'two_plates_b.csv',
        plate='b',
        well='A1',
        Well='A1',
        well0='A01',
        row='A',
        col='1',
        row_i=0,
        col_j=0,
        x=2,
        Data=1,
    )
Esempio n. 4
0
def load_cq(toml_path):
    def parse_csv(toml_path):
        df = pd.read_csv(toml_path / 'Quantification Cq Results.csv')
        df = df.rename({'Well': 'well0', 'Cq': 'cq'}, axis='columns')
        return df[['well0', 'cq']]

    return bio96.load(
        toml_path,
        parse_csv,
        merge_cols={'well0': 'well0'},
        path_guess='{0.stem}/',
    )
Esempio n. 5
0
def test_one_plate():
    labels = bio96.load(DIR / 'one_plate.toml')
    assert row(labels, 'well == "A1"') == dict(
        path=DIR / 'one_plate.csv',
        plate='a',
        well='A1',
        well0='A01',
        row='A',
        col='1',
        row_i=0,
        col_j=0,
        x=1,
    )
Esempio n. 6
0
def load_melt_data(toml_path):
    def melt_from_xlsx(xlsx_path):
        return pd.read_excel(xlsx_path,
                             sheet_name='Melt Curve Raw Data',
                             header=43)

    df, opt = bio96.load(
        toml_path,
        melt_from_xlsx,
        {'well': 'Well Position'},
    )
    df = drop_outliers(df)
    return df
Esempio n. 7
0
def load_pcr_data(toml_path):
    def pcr_from_xlsx(xlsx_path):
        return pd.read_excel(xlsx_path,
                             sheet_name='Amplification Data',
                             header=43)

    df, opt = bio96.load(
        toml_path,
        pcr_from_xlsx,
        {'well': 'Well Position'},
    )
    df = drop_outliers(df)
    return df
Esempio n. 8
0
def load_ct_data(toml_path):
    def ct_from_xlsx(xlsx_path):
        ct = pd.read_excel(xlsx_path, sheet_name='Results', header=43)
        ct = ct.dropna(thresh=3)
        ct = ct.dropna(axis='columns', how='all')
        return ct

    df, opt = bio96.load(
        toml_path,
        ct_from_xlsx,
        {'well': 'Well Position'},
    )
    df = drop_outliers(df)
    return df
Esempio n. 9
0
def load_data(toml_path):
    def load_biotek(path):
        expt = plate_reader.BiotekExperiment(path)
        return expt.kinetic[600]

    df, options = bio96.load(
        toml_path,
        load_biotek,
        {'well': 'well'},
        path_guess='{0.stem}.xlsx',
    )

    print(f"{df.read.isna().sum()}/{len(df)} data points discarded.")
    return df.dropna(subset=['read'])
Esempio n. 10
0
def load(toml_path, query=None, aggregate=None, genes=None):
    toml_path = Path(toml_path)

    def biorad(path):
        df = pd.read_csv(path / 'Quantification Cq Results.csv')
        df = df.rename({'Well': 'well0', 'Cq': 'cq'}, axis='columns')
        return df[['well0', 'cq']]

    def applied_biosystems(path):
        df = pd.read_excel(path, sheet_name='Results', header=43)
        df = df.dropna(thresh=3)
        df = df.rename({'Well Position': 'well', 'CT': 'cq'})
        return df[['well', 'cq']]

    with toml_path.open() as f:
        first_line = f.read().lower()

    if "applied biosystems" in first_line:
        load_unlabeled_cq = applied_biosystems
        merge_cols = {'well': 'well'},
    else:  # biorad
        load_unlabeled_cq = biorad
        merge_cols = {'well0': 'well0'}

    df, options = bio96.load(
        toml_path,
        load_unlabeled_cq,
        merge_cols,
        path_guess='{0.stem}/',
    )

    if query:
        n0 = len(df)
        df = df.query(query)
        print(f"Kept {len(df)}/{n0} observations where {query}")

    if aggregate:
        df = calc_cq(df, aggregate)

    if genes:
        df = calc_Δcq(df, genes)

    return df, options
Esempio n. 11
0
def test_concat():
    labels = bio96.load(DIR / 'one_concat.toml')
    assert len(labels) == 2

    with raises(ConfigError, match="Did you mean to set `meta.path`?"):
        bio96.load(DIR / 'one_concat.toml', path_required=True)

    labels = bio96.load(DIR / 'two_concats.toml')
    assert len(labels) == 3

    with raises(ConfigError, match="Did you mean to set `meta.path`?"):
        bio96.load(DIR / 'two_concats.toml', path_required=True)

    # Should not raise.  It's ok that `just_concat.csv` doesn't exist, because
    # `just_concat.toml` doesn't specify any wells.
    labels = bio96.load(
        DIR / 'just_concat.toml',
        path_guess='{0.stem}.csv',
        path_required=True,
    )
    assert len(labels) == 1
Esempio n. 12
0
def test_bad_args():

    # Doesn't make sense to specify `merge_cols` without `data_loader`:
    with raises(ValueError):
        bio96.load(DIR / 'two_plates.toml', merge_cols={})

    # Non-existent merge columns.
    with raises(ValueError, match='xxx'):
        bio96.load(
            DIR / 'two_plates.toml',
            data_loader=pd.read_csv,
            merge_cols={'xxx': 'Well'},
        )

    with raises(ValueError, match='xxx'):
        bio96.load(
            DIR / 'two_plates.toml',
            data_loader=pd.read_csv,
            merge_cols={'well': 'xxx'},
        )
Esempio n. 13
0
import pandas as pd

args = docopt.docopt(__doc__)


def df_from_path(path):
    """
   Load experimental data from the given path into a data frame.  Also make 
   sure that data frame has the column(s) referenced by the `merge_cols` 
   argument to `bio96.load()`, which in this case is "Well".

   This function will generally be different for every type of data you 
   work with.  Many instruments can export data in the ``*.xlsx`` format, 
   which can be easily loaded into a data frame using ``pd.read_excel()``.  
   For other file formats, you may be able to find a library to parse them, 
   or you may have to parse them yourself.
   """
    return pd.read_excel(path)


df = bio96.load(args['<toml>'], df_from_path, {'well': 'Well'})

# The data frame loaded above will have rows for each well, columns for each
# field in the TOML file, and more columns for each kind of data found in
# the paths referenced by (or inferred from) the TOML file.  There are lots
# of ways to work with the data, but the ``pd.DataFrame.groupby()`` method
# (useful for selecting subsets of the data based on one or more attributes)
# is good to know about.

print(df)
Esempio n. 14
0
def test_reasonably_complex():
    df = bio96.load(DIR / 'reasonably_complex.toml')
    assert len(df) == 32
Esempio n. 15
0
def test_one_well():
    labels = bio96.load(DIR / 'one_well_xy.toml')
    assert row(labels, 'well == "A1"') == dict(
        well='A1',
        well0='A01',
        row='A',
        col='1',
        row_i=0,
        col_j=0,
        x=1,
        y=1,
    )

    labels = bio96.load(DIR / 'one_well_xy.toml', path_guess='{0.stem}.csv')
    assert row(labels, 'well == "A1"') == dict(
        path=DIR / 'one_well_xy.csv',
        well='A1',
        well0='A01',
        row='A',
        col='1',
        row_i=0,
        col_j=0,
        x=1,
        y=1,
    )

    with raises(ConfigError, match='one_well_xy.toml'):
        bio96.load(DIR / 'one_well_xy.toml', path_required=True)
    with raises(ConfigError, match='one_well_xy.toml'):
        bio96.load(DIR / 'one_well_xy.toml', data_loader=pd.read_csv)

    labels, data = bio96.load(
        DIR / 'one_well_xy.toml',
        data_loader=pd.read_csv,
        path_guess='{0.stem}.csv',
    )
    assert row(labels, 'well == "A1"') == dict(
        path=DIR / 'one_well_xy.csv',
        well='A1',
        well0='A01',
        row='A',
        col='1',
        row_i=0,
        col_j=0,
        x=1,
        y=1,
    )
    assert row(data, 'Well == "A1"') == dict(
        Well='A1',
        path=DIR / 'one_well_xy.csv',
        Data='xy',
    )

    df = bio96.load(
        DIR / 'one_well_xy.toml',
        data_loader=pd.read_csv,
        merge_cols={'well': 'Well'},
        path_guess='{0.stem}.csv',
    )
    assert row(df, 'well == "A1"') == dict(
        path=DIR / 'one_well_xy.csv',
        well='A1',
        Well='A1',
        well0='A01',
        row='A',
        col='1',
        row_i=0,
        col_j=0,
        x=1,
        y=1,
        Data='xy',
    )

    df = bio96.load(
        DIR / 'one_well_xy.toml',
        data_loader=read_csv_and_rename,
        merge_cols=True,
        path_guess='{0.stem}.csv',
    )
    assert row(df, 'well == "A1"') == dict(
        path=DIR / 'one_well_xy.csv',
        well='A1',
        Well='A1',
        well0='A01',
        row='A',
        col='1',
        row_i=0,
        col_j=0,
        x=1,
        y=1,
        Data='xy',
    )
Esempio n. 16
0
def test_one_well_with_extras(extras_arg, expected):
    labels, extras = bio96.load(
        DIR / 'one_well_xy_extras.toml',
        extras=extras_arg,
    )
    assert extras == expected
    assert row(labels, 'well == "A1"') == dict(
        well='A1',
        well0='A01',
        row='A',
        col='1',
        row_i=0,
        col_j=0,
        x=1,
        y=1,
    )

    labels, data, extras = bio96.load(
        DIR / 'one_well_xy_extras.toml',
        data_loader=pd.read_csv,
        path_guess='{0.stem}.csv',
        extras=extras_arg,
    )
    assert extras == expected
    assert row(labels, 'well == "A1"') == dict(
        path=DIR / 'one_well_xy_extras.csv',
        well='A1',
        well0='A01',
        row='A',
        col='1',
        row_i=0,
        col_j=0,
        x=1,
        y=1,
    )
    assert row(data, 'Well == "A1"') == dict(
        Well='A1',
        path=DIR / 'one_well_xy_extras.csv',
        Data='xy',
    )

    # Merged data
    a1_expected = dict(
        path=DIR / 'one_well_xy_extras.csv',
        well='A1',
        Well='A1',
        well0='A01',
        row='A',
        col='1',
        row_i=0,
        col_j=0,
        x=1,
        y=1,
        Data='xy',
    )

    df, extras = bio96.load(
        DIR / 'one_well_xy_extras.toml',
        data_loader=pd.read_csv,
        merge_cols={'well': 'Well'},
        path_guess='{0.stem}.csv',
        extras=extras_arg,
    )
    assert extras == expected
    assert row(df, 'well == "A1"') == a1_expected

    df, extras = bio96.load(
        DIR / 'one_well_xy_extras.toml',
        data_loader=read_csv_and_rename,
        merge_cols=True,
        path_guess='{0.stem}.csv',
        extras=extras_arg,
    )
    assert extras == expected
    assert row(df, 'well == "A1"') == a1_expected
Esempio n. 17
0
def get_data():
    if os.path.isdir(args['<bio96_metadata>'][0]):
        dataframes = []
        for f in os.listdir(args['<bio96_metadata>']):
            if f.endswith('.data'):
                subdata = bio96.load(os.path.join(args['<bio96_metadata>'], f),
                                     load_dataframe, {'well': 'variable'})
                #print(subdata)
                dataframes.append(subdata)
        data = pd.concat(dataframes, ignore_index=True)
    else:
        dataframes = []
        for f in args['<bio96_metadata>']:
            subdata = bio96.load(f, load_dataframe, {'well': 'variable'})
            dataframes.append(subdata)
        data = pd.concat(dataframes, ignore_index=True)

    data = data.dropna()
    raw = args['--raw']

    # Subtract the no-substrate well
    if args['--zero']:
        import time
        start_time = time.time()
        zero_df = data[data['conc_uM'] == 0]
        zgroups = zero_df.groupby(['enzyme', 'date', 'replicate'])
        groupdict = {}
        for name, group in zgroups:
            if name[0] not in groupdict:
                groupdict[name[0]] = {}
            if name[1] not in groupdict[name[0]]:
                groupdict[name[0]][name[1]] = {}
            groupdict[name[0]][name[1]][name[2]] = group
        for index, row in data.iterrows():
            enzyme = row['enzyme']
            date = row['date']
            replicate = row['replicate']
            #print(replicate)
            conc = row['conc_uM']
            timept = row['Time']
            zdf = groupdict[enzyme][date][replicate]
            zero = zdf.loc[zdf['Time'] == timept]['value'].tolist()[0]
            """
            zero = zero_df.loc[(zero_df['enzyme']==enzyme) & \
                    (zero_df['date']==date) & \
                    (zero_df['replicate']==replicate) & \
                    (zero_df['conc_uM']==conc) & \
                    (zero_df['Time']==time),
                    ['conc_uM']]
            """
            row['value'] = row['value'] - zero
            data.iloc[index] = row
    #print('time to load:', time.time() - start_time)

    if raw:
        data['product'] = data['value']
    else:
        data['product'] = data['units'] * data['value'] / data[
            'conversion_factor']

    if args['--correction']:
        correction = float(args['--correction'])
        data['product'] = data['product'] * correction

    if 'enzyme_conc' in data and not raw:
        data['persecond'] = data['product'] / data['enzyme_conc']
    else:
        data['persecond'] = data['product']
    if 'enzyme_conc' not in data:
        data['enzyme_conc'] = 1
    data['clicked_linear'] = False
    data['clicked_kinetics'] = False
    data['slope'] = 1
    data['min_time'] = min(data['Time'])
    data['max_time'] = max(data['Time'])
    data['shown'] = True

    return data
Esempio n. 18
0
def parse_labels():
    df = bio96.load('20181002_sgrna_qpcr.toml')
    df = df.set_index('well')
    return df
Esempio n. 19
0
import bio96
import pandas as pd

#load the file containing the location of primer combinations in differend plates:
plate_primer_well = pd.read_csv(snakemake.params[0])

#load the annotation of your experiment:
annotation_df = bio96.load(
    snakemake.input[0])[['primer_plate', 'well', 'plate', 'cell_type']]

#combine plate with well information to find the correct primer_combination
annotation_df[
    'plate_well'] = annotation_df['primer_plate'] + "_" + annotation_df['well']

#merge dfs to extract primer combi info from plate_primer_well
ann_primers = pd.merge(annotation_df, plate_primer_well, on='plate_well')
ann_primers['sample_name'] = ann_primers['plate'] + ann_primers['primer_combi']
ann_primers['well'] = ann_primers['well']

ann_file = ann_primers[['sample_name', 'cell_type', 'well']]
ann_file.to_csv(snakemake.output[0], index=False)